|
{ |
|
"best_metric": 0.12174894318263237, |
|
"best_model_checkpoint": "Checkpoints/noisereduce_small_HLBTAugs/checkpoint-10612", |
|
"epoch": 7.0, |
|
"eval_steps": 1000, |
|
"global_step": 10612, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016490765171503958, |
|
"grad_norm": 39.31369400024414, |
|
"learning_rate": 4.2000000000000006e-07, |
|
"loss": 6.8388, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.032981530343007916, |
|
"grad_norm": 33.235538482666016, |
|
"learning_rate": 9.200000000000001e-07, |
|
"loss": 6.0977, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04947229551451187, |
|
"grad_norm": 26.6918888092041, |
|
"learning_rate": 1.42e-06, |
|
"loss": 5.1518, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06596306068601583, |
|
"grad_norm": 19.928937911987305, |
|
"learning_rate": 1.9200000000000003e-06, |
|
"loss": 4.1287, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08245382585751979, |
|
"grad_norm": 17.019432067871094, |
|
"learning_rate": 2.42e-06, |
|
"loss": 3.3885, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09894459102902374, |
|
"grad_norm": 14.757500648498535, |
|
"learning_rate": 2.92e-06, |
|
"loss": 2.9706, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11543535620052771, |
|
"grad_norm": 15.232291221618652, |
|
"learning_rate": 3.4200000000000007e-06, |
|
"loss": 2.6453, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.13192612137203166, |
|
"grad_norm": 14.23778247833252, |
|
"learning_rate": 3.920000000000001e-06, |
|
"loss": 2.3353, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14841688654353563, |
|
"grad_norm": 15.65247631072998, |
|
"learning_rate": 4.42e-06, |
|
"loss": 2.0593, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16490765171503957, |
|
"grad_norm": 14.948330879211426, |
|
"learning_rate": 4.92e-06, |
|
"loss": 1.9178, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.18139841688654354, |
|
"grad_norm": 13.788339614868164, |
|
"learning_rate": 5.420000000000001e-06, |
|
"loss": 1.7265, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19788918205804748, |
|
"grad_norm": 13.352944374084473, |
|
"learning_rate": 5.92e-06, |
|
"loss": 1.6424, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21437994722955145, |
|
"grad_norm": 13.235891342163086, |
|
"learning_rate": 6.42e-06, |
|
"loss": 1.5053, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23087071240105542, |
|
"grad_norm": 14.21623420715332, |
|
"learning_rate": 6.92e-06, |
|
"loss": 1.4925, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24736147757255936, |
|
"grad_norm": 22.086210250854492, |
|
"learning_rate": 7.420000000000001e-06, |
|
"loss": 1.4107, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2638522427440633, |
|
"grad_norm": 12.337775230407715, |
|
"learning_rate": 7.92e-06, |
|
"loss": 1.3594, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28034300791556727, |
|
"grad_norm": 13.980206489562988, |
|
"learning_rate": 8.42e-06, |
|
"loss": 1.3478, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.29683377308707126, |
|
"grad_norm": 12.868133544921875, |
|
"learning_rate": 8.920000000000001e-06, |
|
"loss": 1.2952, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3133245382585752, |
|
"grad_norm": 12.454700469970703, |
|
"learning_rate": 9.42e-06, |
|
"loss": 1.3642, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.32981530343007914, |
|
"grad_norm": 14.960906982421875, |
|
"learning_rate": 9.920000000000002e-06, |
|
"loss": 1.1298, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.34630606860158314, |
|
"grad_norm": 14.04990291595459, |
|
"learning_rate": 9.99533125833704e-06, |
|
"loss": 1.1925, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.3627968337730871, |
|
"grad_norm": 12.418570518493652, |
|
"learning_rate": 9.9897732325478e-06, |
|
"loss": 1.1135, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.379287598944591, |
|
"grad_norm": 12.186723709106445, |
|
"learning_rate": 9.98421520675856e-06, |
|
"loss": 1.0462, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.39577836411609496, |
|
"grad_norm": 13.12454891204834, |
|
"learning_rate": 9.97865718096932e-06, |
|
"loss": 1.0761, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.41226912928759896, |
|
"grad_norm": 9.63180923461914, |
|
"learning_rate": 9.97309915518008e-06, |
|
"loss": 1.1031, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4287598944591029, |
|
"grad_norm": 12.495417594909668, |
|
"learning_rate": 9.967541129390842e-06, |
|
"loss": 1.0276, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.44525065963060684, |
|
"grad_norm": 12.295565605163574, |
|
"learning_rate": 9.961983103601602e-06, |
|
"loss": 0.9776, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.46174142480211083, |
|
"grad_norm": 11.512835502624512, |
|
"learning_rate": 9.956425077812362e-06, |
|
"loss": 0.9664, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4782321899736148, |
|
"grad_norm": 14.376967430114746, |
|
"learning_rate": 9.950867052023122e-06, |
|
"loss": 1.0564, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.4947229551451187, |
|
"grad_norm": 11.573795318603516, |
|
"learning_rate": 9.945309026233882e-06, |
|
"loss": 1.0157, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.5112137203166227, |
|
"grad_norm": 13.335847854614258, |
|
"learning_rate": 9.939751000444643e-06, |
|
"loss": 0.9132, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5277044854881267, |
|
"grad_norm": 11.932069778442383, |
|
"learning_rate": 9.934192974655403e-06, |
|
"loss": 0.9303, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5441952506596306, |
|
"grad_norm": 15.350811004638672, |
|
"learning_rate": 9.928634948866163e-06, |
|
"loss": 0.9654, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.5606860158311345, |
|
"grad_norm": 13.428604125976562, |
|
"learning_rate": 9.923076923076923e-06, |
|
"loss": 0.9238, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5771767810026385, |
|
"grad_norm": 9.998970985412598, |
|
"learning_rate": 9.917518897287685e-06, |
|
"loss": 0.9136, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.5936675461741425, |
|
"grad_norm": 12.5803804397583, |
|
"learning_rate": 9.911960871498445e-06, |
|
"loss": 0.8368, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6101583113456465, |
|
"grad_norm": 9.582605361938477, |
|
"learning_rate": 9.906402845709205e-06, |
|
"loss": 0.8865, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6266490765171504, |
|
"grad_norm": 11.229839324951172, |
|
"learning_rate": 9.900844819919965e-06, |
|
"loss": 0.9401, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.6431398416886543, |
|
"grad_norm": 9.11830997467041, |
|
"learning_rate": 9.895286794130725e-06, |
|
"loss": 0.9186, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.6596306068601583, |
|
"grad_norm": 8.9396333694458, |
|
"learning_rate": 9.889728768341485e-06, |
|
"loss": 0.8388, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6761213720316622, |
|
"grad_norm": 11.35300064086914, |
|
"learning_rate": 9.884170742552245e-06, |
|
"loss": 0.8979, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.6926121372031663, |
|
"grad_norm": 11.368852615356445, |
|
"learning_rate": 9.878612716763007e-06, |
|
"loss": 0.8806, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.7091029023746702, |
|
"grad_norm": 9.17705249786377, |
|
"learning_rate": 9.873054690973767e-06, |
|
"loss": 0.7954, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.7255936675461742, |
|
"grad_norm": 9.339437484741211, |
|
"learning_rate": 9.867496665184527e-06, |
|
"loss": 0.8565, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7420844327176781, |
|
"grad_norm": 13.083456039428711, |
|
"learning_rate": 9.861938639395287e-06, |
|
"loss": 0.8049, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.758575197889182, |
|
"grad_norm": 13.02919864654541, |
|
"learning_rate": 9.856380613606049e-06, |
|
"loss": 0.7948, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.775065963060686, |
|
"grad_norm": 8.956382751464844, |
|
"learning_rate": 9.850822587816809e-06, |
|
"loss": 0.802, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.7915567282321899, |
|
"grad_norm": 12.043669700622559, |
|
"learning_rate": 9.845264562027569e-06, |
|
"loss": 0.8199, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.808047493403694, |
|
"grad_norm": 10.119061470031738, |
|
"learning_rate": 9.839706536238329e-06, |
|
"loss": 0.7736, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.8245382585751979, |
|
"grad_norm": 9.62971019744873, |
|
"learning_rate": 9.83414851044909e-06, |
|
"loss": 0.7698, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8410290237467019, |
|
"grad_norm": 14.902313232421875, |
|
"learning_rate": 9.82859048465985e-06, |
|
"loss": 0.7961, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.8575197889182058, |
|
"grad_norm": 13.862401008605957, |
|
"learning_rate": 9.823032458870609e-06, |
|
"loss": 0.7682, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.8740105540897097, |
|
"grad_norm": 10.040505409240723, |
|
"learning_rate": 9.81747443308137e-06, |
|
"loss": 0.777, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.8905013192612137, |
|
"grad_norm": 8.199767112731934, |
|
"learning_rate": 9.81191640729213e-06, |
|
"loss": 0.7698, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.9069920844327177, |
|
"grad_norm": 9.803958892822266, |
|
"learning_rate": 9.80635838150289e-06, |
|
"loss": 0.7456, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.9234828496042217, |
|
"grad_norm": 10.063946723937988, |
|
"learning_rate": 9.80080035571365e-06, |
|
"loss": 0.7653, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9399736147757256, |
|
"grad_norm": 11.48767375946045, |
|
"learning_rate": 9.795242329924412e-06, |
|
"loss": 0.7554, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.9564643799472295, |
|
"grad_norm": 10.616227149963379, |
|
"learning_rate": 9.789684304135172e-06, |
|
"loss": 0.7911, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.9729551451187335, |
|
"grad_norm": 8.222291946411133, |
|
"learning_rate": 9.784126278345932e-06, |
|
"loss": 0.7838, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.9894459102902374, |
|
"grad_norm": 9.053133964538574, |
|
"learning_rate": 9.778568252556692e-06, |
|
"loss": 0.7834, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.37832075357437134, |
|
"eval_runtime": 1456.4978, |
|
"eval_samples_per_second": 3.567, |
|
"eval_steps_per_second": 1.784, |
|
"eval_wer": 0.18685964032385183, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.0059366754617414, |
|
"grad_norm": 7.5070929527282715, |
|
"learning_rate": 9.773010226767452e-06, |
|
"loss": 0.7013, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.0224274406332454, |
|
"grad_norm": 12.309554100036621, |
|
"learning_rate": 9.767452200978214e-06, |
|
"loss": 0.7185, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.0389182058047493, |
|
"grad_norm": 9.577189445495605, |
|
"learning_rate": 9.761894175188974e-06, |
|
"loss": 0.6881, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.0554089709762533, |
|
"grad_norm": 13.550950050354004, |
|
"learning_rate": 9.756336149399734e-06, |
|
"loss": 0.7469, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0718997361477574, |
|
"grad_norm": 11.185826301574707, |
|
"learning_rate": 9.750778123610494e-06, |
|
"loss": 0.6697, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.0883905013192612, |
|
"grad_norm": 9.437973976135254, |
|
"learning_rate": 9.745220097821256e-06, |
|
"loss": 0.7037, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.1048812664907652, |
|
"grad_norm": 8.722063064575195, |
|
"learning_rate": 9.739662072032014e-06, |
|
"loss": 0.6956, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.121372031662269, |
|
"grad_norm": 8.723018646240234, |
|
"learning_rate": 9.734104046242774e-06, |
|
"loss": 0.736, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.1378627968337731, |
|
"grad_norm": 7.343624114990234, |
|
"learning_rate": 9.728546020453536e-06, |
|
"loss": 0.7095, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.154353562005277, |
|
"grad_norm": 8.930522918701172, |
|
"learning_rate": 9.722987994664296e-06, |
|
"loss": 0.6715, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.170844327176781, |
|
"grad_norm": 8.927308082580566, |
|
"learning_rate": 9.717429968875056e-06, |
|
"loss": 0.6891, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.187335092348285, |
|
"grad_norm": 9.455988883972168, |
|
"learning_rate": 9.711871943085816e-06, |
|
"loss": 0.6908, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.2038258575197889, |
|
"grad_norm": 10.517112731933594, |
|
"learning_rate": 9.706313917296578e-06, |
|
"loss": 0.6583, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.220316622691293, |
|
"grad_norm": 6.787125587463379, |
|
"learning_rate": 9.700755891507338e-06, |
|
"loss": 0.694, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.2368073878627968, |
|
"grad_norm": 9.443794250488281, |
|
"learning_rate": 9.695197865718098e-06, |
|
"loss": 0.6771, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.2532981530343008, |
|
"grad_norm": 8.916643142700195, |
|
"learning_rate": 9.689639839928858e-06, |
|
"loss": 0.7213, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2697889182058049, |
|
"grad_norm": 9.56280517578125, |
|
"learning_rate": 9.68408181413962e-06, |
|
"loss": 0.6517, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.2862796833773087, |
|
"grad_norm": 10.584586143493652, |
|
"learning_rate": 9.67852378835038e-06, |
|
"loss": 0.6909, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.3027704485488127, |
|
"grad_norm": 7.714522361755371, |
|
"learning_rate": 9.67296576256114e-06, |
|
"loss": 0.6411, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.3192612137203166, |
|
"grad_norm": 8.497810363769531, |
|
"learning_rate": 9.6674077367719e-06, |
|
"loss": 0.6612, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.3357519788918206, |
|
"grad_norm": 7.779177188873291, |
|
"learning_rate": 9.66184971098266e-06, |
|
"loss": 0.6655, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.3522427440633247, |
|
"grad_norm": 8.202792167663574, |
|
"learning_rate": 9.65629168519342e-06, |
|
"loss": 0.6234, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.3687335092348285, |
|
"grad_norm": 6.34199857711792, |
|
"learning_rate": 9.65073365940418e-06, |
|
"loss": 0.6737, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.3852242744063323, |
|
"grad_norm": 9.55582332611084, |
|
"learning_rate": 9.645175633614941e-06, |
|
"loss": 0.6718, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.4017150395778364, |
|
"grad_norm": 9.783499717712402, |
|
"learning_rate": 9.639617607825701e-06, |
|
"loss": 0.6453, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.4182058047493404, |
|
"grad_norm": 7.693665027618408, |
|
"learning_rate": 9.634059582036461e-06, |
|
"loss": 0.631, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.4346965699208443, |
|
"grad_norm": 9.876138687133789, |
|
"learning_rate": 9.628501556247221e-06, |
|
"loss": 0.6951, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.4511873350923483, |
|
"grad_norm": 9.020912170410156, |
|
"learning_rate": 9.622943530457981e-06, |
|
"loss": 0.6479, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4676781002638521, |
|
"grad_norm": 10.81850528717041, |
|
"learning_rate": 9.617385504668743e-06, |
|
"loss": 0.709, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.4841688654353562, |
|
"grad_norm": 9.466885566711426, |
|
"learning_rate": 9.611827478879503e-06, |
|
"loss": 0.6469, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.5006596306068603, |
|
"grad_norm": 10.120772361755371, |
|
"learning_rate": 9.606269453090263e-06, |
|
"loss": 0.6428, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.517150395778364, |
|
"grad_norm": 9.613449096679688, |
|
"learning_rate": 9.600711427301023e-06, |
|
"loss": 0.6808, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.533641160949868, |
|
"grad_norm": 6.854451656341553, |
|
"learning_rate": 9.595153401511785e-06, |
|
"loss": 0.674, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.550131926121372, |
|
"grad_norm": 7.6446309089660645, |
|
"learning_rate": 9.589595375722545e-06, |
|
"loss": 0.6497, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.566622691292876, |
|
"grad_norm": 9.535722732543945, |
|
"learning_rate": 9.584037349933303e-06, |
|
"loss": 0.6262, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.58311345646438, |
|
"grad_norm": 10.47003173828125, |
|
"learning_rate": 9.578479324144065e-06, |
|
"loss": 0.6381, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.599604221635884, |
|
"grad_norm": 9.593868255615234, |
|
"learning_rate": 9.572921298354825e-06, |
|
"loss": 0.6331, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.6160949868073877, |
|
"grad_norm": 9.041787147521973, |
|
"learning_rate": 9.567363272565585e-06, |
|
"loss": 0.6333, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.6325857519788918, |
|
"grad_norm": 10.436051368713379, |
|
"learning_rate": 9.561805246776345e-06, |
|
"loss": 0.6992, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.6490765171503958, |
|
"grad_norm": 10.629966735839844, |
|
"learning_rate": 9.556247220987107e-06, |
|
"loss": 0.6054, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.6655672823218999, |
|
"grad_norm": 9.051644325256348, |
|
"learning_rate": 9.550689195197867e-06, |
|
"loss": 0.6326, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.6820580474934037, |
|
"grad_norm": 10.509145736694336, |
|
"learning_rate": 9.545131169408627e-06, |
|
"loss": 0.5865, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.6985488126649075, |
|
"grad_norm": 10.23874568939209, |
|
"learning_rate": 9.539573143619387e-06, |
|
"loss": 0.6688, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.7150395778364116, |
|
"grad_norm": 9.89738655090332, |
|
"learning_rate": 9.534015117830148e-06, |
|
"loss": 0.6056, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.7315303430079156, |
|
"grad_norm": 7.859192848205566, |
|
"learning_rate": 9.528457092040908e-06, |
|
"loss": 0.6504, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.7480211081794197, |
|
"grad_norm": 6.974312782287598, |
|
"learning_rate": 9.522899066251668e-06, |
|
"loss": 0.6634, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.7645118733509235, |
|
"grad_norm": 8.40381145477295, |
|
"learning_rate": 9.517341040462428e-06, |
|
"loss": 0.562, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.7810026385224274, |
|
"grad_norm": 9.789514541625977, |
|
"learning_rate": 9.511783014673188e-06, |
|
"loss": 0.6344, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.7974934036939314, |
|
"grad_norm": 10.27668285369873, |
|
"learning_rate": 9.50622498888395e-06, |
|
"loss": 0.6181, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.8139841688654355, |
|
"grad_norm": 8.970308303833008, |
|
"learning_rate": 9.500666963094709e-06, |
|
"loss": 0.6289, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.8304749340369393, |
|
"grad_norm": 18.562963485717773, |
|
"learning_rate": 9.49510893730547e-06, |
|
"loss": 0.5785, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.8469656992084431, |
|
"grad_norm": 9.711603164672852, |
|
"learning_rate": 9.48955091151623e-06, |
|
"loss": 0.6709, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.8634564643799472, |
|
"grad_norm": 6.827445983886719, |
|
"learning_rate": 9.48399288572699e-06, |
|
"loss": 0.5618, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.8799472295514512, |
|
"grad_norm": 7.309730529785156, |
|
"learning_rate": 9.47843485993775e-06, |
|
"loss": 0.6147, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.8964379947229553, |
|
"grad_norm": 8.898188591003418, |
|
"learning_rate": 9.47287683414851e-06, |
|
"loss": 0.6617, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.912928759894459, |
|
"grad_norm": 9.802018165588379, |
|
"learning_rate": 9.467318808359272e-06, |
|
"loss": 0.607, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.929419525065963, |
|
"grad_norm": 11.065816879272461, |
|
"learning_rate": 9.461760782570032e-06, |
|
"loss": 0.5876, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.945910290237467, |
|
"grad_norm": 8.673478126525879, |
|
"learning_rate": 9.456202756780792e-06, |
|
"loss": 0.5708, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.962401055408971, |
|
"grad_norm": 8.835992813110352, |
|
"learning_rate": 9.450644730991552e-06, |
|
"loss": 0.5943, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.978891820580475, |
|
"grad_norm": 9.243196487426758, |
|
"learning_rate": 9.445086705202314e-06, |
|
"loss": 0.6245, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.995382585751979, |
|
"grad_norm": 7.948757171630859, |
|
"learning_rate": 9.439528679413074e-06, |
|
"loss": 0.5756, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.33147963881492615, |
|
"eval_runtime": 1586.3717, |
|
"eval_samples_per_second": 3.275, |
|
"eval_steps_per_second": 1.638, |
|
"eval_wer": 0.1471842086408254, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 2.0118733509234827, |
|
"grad_norm": 9.14528751373291, |
|
"learning_rate": 9.433970653623834e-06, |
|
"loss": 0.5881, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.028364116094987, |
|
"grad_norm": 6.768561840057373, |
|
"learning_rate": 9.428412627834594e-06, |
|
"loss": 0.5513, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 2.044854881266491, |
|
"grad_norm": 6.91849422454834, |
|
"learning_rate": 9.422854602045354e-06, |
|
"loss": 0.5318, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.061345646437995, |
|
"grad_norm": 10.172245025634766, |
|
"learning_rate": 9.417296576256114e-06, |
|
"loss": 0.5481, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 2.0778364116094985, |
|
"grad_norm": 7.840974807739258, |
|
"learning_rate": 9.411738550466874e-06, |
|
"loss": 0.5453, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.0943271767810026, |
|
"grad_norm": 10.709723472595215, |
|
"learning_rate": 9.406180524677636e-06, |
|
"loss": 0.5526, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 2.1108179419525066, |
|
"grad_norm": 7.9419169425964355, |
|
"learning_rate": 9.400622498888396e-06, |
|
"loss": 0.5277, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.1273087071240107, |
|
"grad_norm": 9.7705659866333, |
|
"learning_rate": 9.395064473099156e-06, |
|
"loss": 0.5574, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 2.1437994722955147, |
|
"grad_norm": 6.695239067077637, |
|
"learning_rate": 9.389506447309916e-06, |
|
"loss": 0.5335, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.1602902374670183, |
|
"grad_norm": 6.585846900939941, |
|
"learning_rate": 9.383948421520677e-06, |
|
"loss": 0.5108, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 2.1767810026385224, |
|
"grad_norm": 7.804145336151123, |
|
"learning_rate": 9.378390395731437e-06, |
|
"loss": 0.5136, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.1932717678100264, |
|
"grad_norm": 10.289584159851074, |
|
"learning_rate": 9.372832369942197e-06, |
|
"loss": 0.5409, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 2.2097625329815305, |
|
"grad_norm": 10.98071002960205, |
|
"learning_rate": 9.367274344152957e-06, |
|
"loss": 0.5362, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 2.2262532981530345, |
|
"grad_norm": 9.558561325073242, |
|
"learning_rate": 9.361716318363719e-06, |
|
"loss": 0.5324, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 2.242744063324538, |
|
"grad_norm": 7.030877590179443, |
|
"learning_rate": 9.356158292574479e-06, |
|
"loss": 0.5889, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.259234828496042, |
|
"grad_norm": 6.8814697265625, |
|
"learning_rate": 9.350600266785239e-06, |
|
"loss": 0.523, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 2.2757255936675462, |
|
"grad_norm": 6.776719093322754, |
|
"learning_rate": 9.345042240996e-06, |
|
"loss": 0.5468, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 2.2922163588390503, |
|
"grad_norm": 6.252242565155029, |
|
"learning_rate": 9.33948421520676e-06, |
|
"loss": 0.5775, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 2.308707124010554, |
|
"grad_norm": 10.693267822265625, |
|
"learning_rate": 9.33392618941752e-06, |
|
"loss": 0.5281, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.325197889182058, |
|
"grad_norm": 8.575288772583008, |
|
"learning_rate": 9.32836816362828e-06, |
|
"loss": 0.5695, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 2.341688654353562, |
|
"grad_norm": 7.428770065307617, |
|
"learning_rate": 9.322810137839041e-06, |
|
"loss": 0.5466, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 2.358179419525066, |
|
"grad_norm": 10.00837516784668, |
|
"learning_rate": 9.317252112049801e-06, |
|
"loss": 0.5537, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 2.37467018469657, |
|
"grad_norm": 8.1272611618042, |
|
"learning_rate": 9.311694086260561e-06, |
|
"loss": 0.5876, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.3911609498680737, |
|
"grad_norm": 6.853724956512451, |
|
"learning_rate": 9.306136060471321e-06, |
|
"loss": 0.5321, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 2.4076517150395778, |
|
"grad_norm": 8.857322692871094, |
|
"learning_rate": 9.300578034682081e-06, |
|
"loss": 0.549, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 2.424142480211082, |
|
"grad_norm": 8.530628204345703, |
|
"learning_rate": 9.295020008892843e-06, |
|
"loss": 0.6112, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.440633245382586, |
|
"grad_norm": 7.353663444519043, |
|
"learning_rate": 9.289461983103603e-06, |
|
"loss": 0.5738, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.4571240105540895, |
|
"grad_norm": 7.334917068481445, |
|
"learning_rate": 9.283903957314363e-06, |
|
"loss": 0.5684, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.4736147757255935, |
|
"grad_norm": 9.152280807495117, |
|
"learning_rate": 9.278345931525123e-06, |
|
"loss": 0.5736, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.4901055408970976, |
|
"grad_norm": 6.766674041748047, |
|
"learning_rate": 9.272787905735884e-06, |
|
"loss": 0.5036, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.5065963060686016, |
|
"grad_norm": 9.731369972229004, |
|
"learning_rate": 9.267229879946644e-06, |
|
"loss": 0.5643, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.5230870712401057, |
|
"grad_norm": 9.269509315490723, |
|
"learning_rate": 9.261671854157403e-06, |
|
"loss": 0.5074, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.5395778364116097, |
|
"grad_norm": 10.63927936553955, |
|
"learning_rate": 9.256113828368165e-06, |
|
"loss": 0.5361, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.5560686015831133, |
|
"grad_norm": 7.579345226287842, |
|
"learning_rate": 9.250555802578925e-06, |
|
"loss": 0.5388, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.5725593667546174, |
|
"grad_norm": 8.379159927368164, |
|
"learning_rate": 9.244997776789685e-06, |
|
"loss": 0.5462, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.5890501319261214, |
|
"grad_norm": 6.8499860763549805, |
|
"learning_rate": 9.239439751000445e-06, |
|
"loss": 0.5571, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.6055408970976255, |
|
"grad_norm": 8.696283340454102, |
|
"learning_rate": 9.233881725211206e-06, |
|
"loss": 0.5593, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.622031662269129, |
|
"grad_norm": 8.017210960388184, |
|
"learning_rate": 9.228323699421966e-06, |
|
"loss": 0.5044, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.638522427440633, |
|
"grad_norm": 7.696545124053955, |
|
"learning_rate": 9.222765673632726e-06, |
|
"loss": 0.5696, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.655013192612137, |
|
"grad_norm": 8.843703269958496, |
|
"learning_rate": 9.217207647843486e-06, |
|
"loss": 0.5285, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.6715039577836412, |
|
"grad_norm": 9.353804588317871, |
|
"learning_rate": 9.211649622054248e-06, |
|
"loss": 0.5766, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.6879947229551453, |
|
"grad_norm": 9.517998695373535, |
|
"learning_rate": 9.206091596265008e-06, |
|
"loss": 0.5299, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.7044854881266494, |
|
"grad_norm": 8.601608276367188, |
|
"learning_rate": 9.200533570475768e-06, |
|
"loss": 0.5392, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.720976253298153, |
|
"grad_norm": 8.98355770111084, |
|
"learning_rate": 9.194975544686528e-06, |
|
"loss": 0.5403, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.737467018469657, |
|
"grad_norm": 8.31533145904541, |
|
"learning_rate": 9.189417518897288e-06, |
|
"loss": 0.5444, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.753957783641161, |
|
"grad_norm": 8.77151107788086, |
|
"learning_rate": 9.183859493108048e-06, |
|
"loss": 0.4957, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.7704485488126647, |
|
"grad_norm": 8.633058547973633, |
|
"learning_rate": 9.178301467318808e-06, |
|
"loss": 0.5124, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.7869393139841687, |
|
"grad_norm": 6.91944694519043, |
|
"learning_rate": 9.17274344152957e-06, |
|
"loss": 0.5006, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.8034300791556728, |
|
"grad_norm": 8.961416244506836, |
|
"learning_rate": 9.16718541574033e-06, |
|
"loss": 0.5394, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.819920844327177, |
|
"grad_norm": 9.132115364074707, |
|
"learning_rate": 9.16162738995109e-06, |
|
"loss": 0.4883, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.836411609498681, |
|
"grad_norm": 8.976722717285156, |
|
"learning_rate": 9.15606936416185e-06, |
|
"loss": 0.5483, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.852902374670185, |
|
"grad_norm": 6.239002704620361, |
|
"learning_rate": 9.15051133837261e-06, |
|
"loss": 0.5395, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.8693931398416885, |
|
"grad_norm": 8.597009658813477, |
|
"learning_rate": 9.144953312583372e-06, |
|
"loss": 0.5363, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.8858839050131926, |
|
"grad_norm": 8.338591575622559, |
|
"learning_rate": 9.139395286794132e-06, |
|
"loss": 0.5185, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.9023746701846966, |
|
"grad_norm": 7.570108890533447, |
|
"learning_rate": 9.133837261004892e-06, |
|
"loss": 0.5011, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.9188654353562007, |
|
"grad_norm": 8.193133354187012, |
|
"learning_rate": 9.128279235215652e-06, |
|
"loss": 0.5201, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.9353562005277043, |
|
"grad_norm": 8.535017013549805, |
|
"learning_rate": 9.122721209426413e-06, |
|
"loss": 0.5196, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.9518469656992083, |
|
"grad_norm": 10.10825252532959, |
|
"learning_rate": 9.117163183637173e-06, |
|
"loss": 0.4601, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.9683377308707124, |
|
"grad_norm": 9.440041542053223, |
|
"learning_rate": 9.111605157847933e-06, |
|
"loss": 0.5435, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.9848284960422165, |
|
"grad_norm": 9.113434791564941, |
|
"learning_rate": 9.106047132058693e-06, |
|
"loss": 0.59, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.31148383021354675, |
|
"eval_runtime": 1520.788, |
|
"eval_samples_per_second": 3.417, |
|
"eval_steps_per_second": 1.708, |
|
"eval_wer": 0.14428243891953857, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 3.0013192612137205, |
|
"grad_norm": 6.392806529998779, |
|
"learning_rate": 9.100489106269453e-06, |
|
"loss": 0.5845, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 3.017810026385224, |
|
"grad_norm": 8.058794021606445, |
|
"learning_rate": 9.094931080480214e-06, |
|
"loss": 0.4345, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 3.034300791556728, |
|
"grad_norm": 7.85006856918335, |
|
"learning_rate": 9.089373054690974e-06, |
|
"loss": 0.4532, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.050791556728232, |
|
"grad_norm": 9.823562622070312, |
|
"learning_rate": 9.083815028901735e-06, |
|
"loss": 0.4992, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 3.0672823218997363, |
|
"grad_norm": 7.448474407196045, |
|
"learning_rate": 9.078257003112495e-06, |
|
"loss": 0.4648, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 3.0837730870712403, |
|
"grad_norm": 6.409183502197266, |
|
"learning_rate": 9.072698977323255e-06, |
|
"loss": 0.4332, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 3.100263852242744, |
|
"grad_norm": 8.675482749938965, |
|
"learning_rate": 9.067140951534015e-06, |
|
"loss": 0.4741, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.116754617414248, |
|
"grad_norm": 7.694202423095703, |
|
"learning_rate": 9.061582925744777e-06, |
|
"loss": 0.5007, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 3.133245382585752, |
|
"grad_norm": 8.39884090423584, |
|
"learning_rate": 9.056024899955537e-06, |
|
"loss": 0.4821, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 3.149736147757256, |
|
"grad_norm": 9.592146873474121, |
|
"learning_rate": 9.050466874166297e-06, |
|
"loss": 0.4572, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 3.16622691292876, |
|
"grad_norm": 10.090729713439941, |
|
"learning_rate": 9.044908848377057e-06, |
|
"loss": 0.5059, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.1827176781002637, |
|
"grad_norm": 7.96800422668457, |
|
"learning_rate": 9.039350822587819e-06, |
|
"loss": 0.4581, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 3.199208443271768, |
|
"grad_norm": 10.114542961120605, |
|
"learning_rate": 9.033792796798579e-06, |
|
"loss": 0.5104, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 3.215699208443272, |
|
"grad_norm": 7.575864791870117, |
|
"learning_rate": 9.028234771009339e-06, |
|
"loss": 0.4697, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 3.232189973614776, |
|
"grad_norm": 8.376447677612305, |
|
"learning_rate": 9.022676745220099e-06, |
|
"loss": 0.477, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.2486807387862795, |
|
"grad_norm": 7.300264835357666, |
|
"learning_rate": 9.017118719430859e-06, |
|
"loss": 0.4581, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 3.2651715039577835, |
|
"grad_norm": 8.129483222961426, |
|
"learning_rate": 9.011560693641619e-06, |
|
"loss": 0.5413, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 3.2816622691292876, |
|
"grad_norm": 8.351349830627441, |
|
"learning_rate": 9.006002667852379e-06, |
|
"loss": 0.5203, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 3.2981530343007917, |
|
"grad_norm": 8.101301193237305, |
|
"learning_rate": 9.000444642063139e-06, |
|
"loss": 0.4434, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.3146437994722957, |
|
"grad_norm": 8.380141258239746, |
|
"learning_rate": 8.9948866162739e-06, |
|
"loss": 0.5014, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 3.3311345646437993, |
|
"grad_norm": 6.295933723449707, |
|
"learning_rate": 8.98932859048466e-06, |
|
"loss": 0.5022, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 3.3476253298153034, |
|
"grad_norm": 8.170040130615234, |
|
"learning_rate": 8.98377056469542e-06, |
|
"loss": 0.4938, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 3.3641160949868074, |
|
"grad_norm": 8.715851783752441, |
|
"learning_rate": 8.97821253890618e-06, |
|
"loss": 0.469, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.3806068601583115, |
|
"grad_norm": 8.525429725646973, |
|
"learning_rate": 8.972654513116942e-06, |
|
"loss": 0.4733, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 3.397097625329815, |
|
"grad_norm": 9.116293907165527, |
|
"learning_rate": 8.967096487327702e-06, |
|
"loss": 0.4529, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 3.413588390501319, |
|
"grad_norm": 7.459149360656738, |
|
"learning_rate": 8.961538461538462e-06, |
|
"loss": 0.4919, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 3.430079155672823, |
|
"grad_norm": 12.199692726135254, |
|
"learning_rate": 8.955980435749222e-06, |
|
"loss": 0.4397, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.4465699208443272, |
|
"grad_norm": 5.766480922698975, |
|
"learning_rate": 8.950422409959984e-06, |
|
"loss": 0.4934, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 3.4630606860158313, |
|
"grad_norm": 8.253438949584961, |
|
"learning_rate": 8.944864384170742e-06, |
|
"loss": 0.487, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 3.4795514511873353, |
|
"grad_norm": 7.708489894866943, |
|
"learning_rate": 8.939306358381502e-06, |
|
"loss": 0.4843, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 3.496042216358839, |
|
"grad_norm": 7.353459358215332, |
|
"learning_rate": 8.933748332592264e-06, |
|
"loss": 0.4652, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.512532981530343, |
|
"grad_norm": 11.070839881896973, |
|
"learning_rate": 8.928190306803024e-06, |
|
"loss": 0.5014, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 3.529023746701847, |
|
"grad_norm": 6.638571262359619, |
|
"learning_rate": 8.922632281013784e-06, |
|
"loss": 0.5049, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 3.5455145118733506, |
|
"grad_norm": 5.465677261352539, |
|
"learning_rate": 8.917074255224544e-06, |
|
"loss": 0.4593, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 3.5620052770448547, |
|
"grad_norm": 6.364098072052002, |
|
"learning_rate": 8.911516229435306e-06, |
|
"loss": 0.4506, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 3.5784960422163588, |
|
"grad_norm": 6.888390064239502, |
|
"learning_rate": 8.905958203646066e-06, |
|
"loss": 0.4435, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 3.594986807387863, |
|
"grad_norm": 8.262774467468262, |
|
"learning_rate": 8.900400177856826e-06, |
|
"loss": 0.501, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 3.611477572559367, |
|
"grad_norm": 10.522833824157715, |
|
"learning_rate": 8.894842152067586e-06, |
|
"loss": 0.5003, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 3.627968337730871, |
|
"grad_norm": 9.361923217773438, |
|
"learning_rate": 8.889284126278348e-06, |
|
"loss": 0.4184, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.6444591029023745, |
|
"grad_norm": 11.22805404663086, |
|
"learning_rate": 8.883726100489108e-06, |
|
"loss": 0.4803, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 3.6609498680738786, |
|
"grad_norm": 6.513778209686279, |
|
"learning_rate": 8.878168074699868e-06, |
|
"loss": 0.5071, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 3.6774406332453826, |
|
"grad_norm": 7.134753227233887, |
|
"learning_rate": 8.872610048910628e-06, |
|
"loss": 0.4784, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 3.6939313984168867, |
|
"grad_norm": 7.034178733825684, |
|
"learning_rate": 8.867052023121388e-06, |
|
"loss": 0.4687, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 3.7104221635883903, |
|
"grad_norm": 5.566599369049072, |
|
"learning_rate": 8.861493997332148e-06, |
|
"loss": 0.5028, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 3.7269129287598943, |
|
"grad_norm": 8.106322288513184, |
|
"learning_rate": 8.855935971542908e-06, |
|
"loss": 0.4644, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 3.7434036939313984, |
|
"grad_norm": 7.244205951690674, |
|
"learning_rate": 8.85037794575367e-06, |
|
"loss": 0.4561, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 3.7598944591029024, |
|
"grad_norm": 9.410643577575684, |
|
"learning_rate": 8.84481991996443e-06, |
|
"loss": 0.4611, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 3.7763852242744065, |
|
"grad_norm": 5.8635687828063965, |
|
"learning_rate": 8.83926189417519e-06, |
|
"loss": 0.4702, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 3.7928759894459105, |
|
"grad_norm": 7.882669448852539, |
|
"learning_rate": 8.83370386838595e-06, |
|
"loss": 0.4609, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 3.809366754617414, |
|
"grad_norm": 8.896418571472168, |
|
"learning_rate": 8.82814584259671e-06, |
|
"loss": 0.4812, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 3.825857519788918, |
|
"grad_norm": 6.063759803771973, |
|
"learning_rate": 8.822587816807471e-06, |
|
"loss": 0.5068, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 3.8423482849604222, |
|
"grad_norm": 7.969833850860596, |
|
"learning_rate": 8.817029791018231e-06, |
|
"loss": 0.4726, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 3.858839050131926, |
|
"grad_norm": 7.975457191467285, |
|
"learning_rate": 8.811471765228991e-06, |
|
"loss": 0.4773, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 3.87532981530343, |
|
"grad_norm": 7.696924209594727, |
|
"learning_rate": 8.805913739439751e-06, |
|
"loss": 0.4699, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 3.891820580474934, |
|
"grad_norm": 7.673986911773682, |
|
"learning_rate": 8.800355713650513e-06, |
|
"loss": 0.4245, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 3.908311345646438, |
|
"grad_norm": 7.145058631896973, |
|
"learning_rate": 8.794797687861273e-06, |
|
"loss": 0.4492, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 3.924802110817942, |
|
"grad_norm": 6.980532646179199, |
|
"learning_rate": 8.789239662072033e-06, |
|
"loss": 0.4476, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 3.941292875989446, |
|
"grad_norm": 5.808567047119141, |
|
"learning_rate": 8.783681636282793e-06, |
|
"loss": 0.4633, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 3.9577836411609497, |
|
"grad_norm": 9.074129104614258, |
|
"learning_rate": 8.778123610493553e-06, |
|
"loss": 0.4576, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.9742744063324538, |
|
"grad_norm": 8.285755157470703, |
|
"learning_rate": 8.772565584704313e-06, |
|
"loss": 0.4938, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 3.990765171503958, |
|
"grad_norm": 7.264706611633301, |
|
"learning_rate": 8.767007558915073e-06, |
|
"loss": 0.4727, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.30439668893814087, |
|
"eval_runtime": 1406.1275, |
|
"eval_samples_per_second": 3.695, |
|
"eval_steps_per_second": 1.848, |
|
"eval_wer": 0.12903919180339615, |
|
"step": 6064 |
|
}, |
|
{ |
|
"epoch": 4.007255936675461, |
|
"grad_norm": 9.51659870147705, |
|
"learning_rate": 8.761449533125835e-06, |
|
"loss": 0.4168, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 4.0237467018469655, |
|
"grad_norm": 8.642926216125488, |
|
"learning_rate": 8.755891507336595e-06, |
|
"loss": 0.4274, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.0402374670184695, |
|
"grad_norm": 7.2832489013671875, |
|
"learning_rate": 8.750333481547355e-06, |
|
"loss": 0.427, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 4.056728232189974, |
|
"grad_norm": 8.79689884185791, |
|
"learning_rate": 8.744775455758115e-06, |
|
"loss": 0.4192, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 4.073218997361478, |
|
"grad_norm": 8.33678150177002, |
|
"learning_rate": 8.739217429968877e-06, |
|
"loss": 0.467, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 4.089709762532982, |
|
"grad_norm": 9.119438171386719, |
|
"learning_rate": 8.733659404179637e-06, |
|
"loss": 0.4396, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.106200527704486, |
|
"grad_norm": 5.395960807800293, |
|
"learning_rate": 8.728101378390397e-06, |
|
"loss": 0.4144, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 4.12269129287599, |
|
"grad_norm": 7.0732102394104, |
|
"learning_rate": 8.722543352601157e-06, |
|
"loss": 0.4201, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 4.139182058047494, |
|
"grad_norm": 7.840185165405273, |
|
"learning_rate": 8.716985326811917e-06, |
|
"loss": 0.4212, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 4.155672823218997, |
|
"grad_norm": 10.627087593078613, |
|
"learning_rate": 8.711427301022678e-06, |
|
"loss": 0.4046, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.172163588390501, |
|
"grad_norm": 6.872922897338867, |
|
"learning_rate": 8.705869275233437e-06, |
|
"loss": 0.4225, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 4.188654353562005, |
|
"grad_norm": 9.158402442932129, |
|
"learning_rate": 8.700311249444198e-06, |
|
"loss": 0.4105, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 4.205145118733509, |
|
"grad_norm": 6.458835124969482, |
|
"learning_rate": 8.694753223654958e-06, |
|
"loss": 0.3805, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 4.221635883905013, |
|
"grad_norm": 8.311756134033203, |
|
"learning_rate": 8.689195197865719e-06, |
|
"loss": 0.3907, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.238126649076517, |
|
"grad_norm": 7.731131076812744, |
|
"learning_rate": 8.683637172076479e-06, |
|
"loss": 0.4104, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 4.254617414248021, |
|
"grad_norm": 8.254470825195312, |
|
"learning_rate": 8.678079146287239e-06, |
|
"loss": 0.4037, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 4.271108179419525, |
|
"grad_norm": 6.898271560668945, |
|
"learning_rate": 8.672521120498e-06, |
|
"loss": 0.4715, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 4.287598944591029, |
|
"grad_norm": 6.904751777648926, |
|
"learning_rate": 8.66696309470876e-06, |
|
"loss": 0.4213, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.304089709762533, |
|
"grad_norm": 8.848785400390625, |
|
"learning_rate": 8.66140506891952e-06, |
|
"loss": 0.406, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 4.320580474934037, |
|
"grad_norm": 8.48416805267334, |
|
"learning_rate": 8.65584704313028e-06, |
|
"loss": 0.4219, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 4.337071240105541, |
|
"grad_norm": 10.504020690917969, |
|
"learning_rate": 8.650289017341042e-06, |
|
"loss": 0.4611, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 4.353562005277045, |
|
"grad_norm": 9.928447723388672, |
|
"learning_rate": 8.644730991551802e-06, |
|
"loss": 0.4566, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.370052770448549, |
|
"grad_norm": 8.590391159057617, |
|
"learning_rate": 8.639172965762562e-06, |
|
"loss": 0.4143, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 4.386543535620053, |
|
"grad_norm": 9.111311912536621, |
|
"learning_rate": 8.633614939973322e-06, |
|
"loss": 0.4046, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 4.403034300791557, |
|
"grad_norm": 8.099916458129883, |
|
"learning_rate": 8.628056914184082e-06, |
|
"loss": 0.4309, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 4.419525065963061, |
|
"grad_norm": 8.059656143188477, |
|
"learning_rate": 8.622498888394842e-06, |
|
"loss": 0.4764, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 4.436015831134565, |
|
"grad_norm": 7.070877552032471, |
|
"learning_rate": 8.616940862605602e-06, |
|
"loss": 0.4682, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 4.452506596306069, |
|
"grad_norm": 10.665205955505371, |
|
"learning_rate": 8.611382836816364e-06, |
|
"loss": 0.4091, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 4.468997361477572, |
|
"grad_norm": 7.845228672027588, |
|
"learning_rate": 8.605824811027124e-06, |
|
"loss": 0.4549, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 4.485488126649076, |
|
"grad_norm": 8.280657768249512, |
|
"learning_rate": 8.600266785237884e-06, |
|
"loss": 0.4166, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 4.50197889182058, |
|
"grad_norm": 9.942441940307617, |
|
"learning_rate": 8.594708759448644e-06, |
|
"loss": 0.4164, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 4.518469656992084, |
|
"grad_norm": 7.303974151611328, |
|
"learning_rate": 8.589150733659406e-06, |
|
"loss": 0.4455, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 4.534960422163588, |
|
"grad_norm": 9.158472061157227, |
|
"learning_rate": 8.583592707870166e-06, |
|
"loss": 0.4324, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 4.5514511873350925, |
|
"grad_norm": 8.51202392578125, |
|
"learning_rate": 8.578034682080926e-06, |
|
"loss": 0.4393, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 4.5679419525065965, |
|
"grad_norm": 8.030768394470215, |
|
"learning_rate": 8.572476656291686e-06, |
|
"loss": 0.4463, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 4.584432717678101, |
|
"grad_norm": 9.200541496276855, |
|
"learning_rate": 8.566918630502447e-06, |
|
"loss": 0.4654, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 4.600923482849604, |
|
"grad_norm": 8.542319297790527, |
|
"learning_rate": 8.561360604713207e-06, |
|
"loss": 0.4101, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 4.617414248021108, |
|
"grad_norm": 7.467373847961426, |
|
"learning_rate": 8.555802578923967e-06, |
|
"loss": 0.4343, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 4.633905013192612, |
|
"grad_norm": 6.186234474182129, |
|
"learning_rate": 8.550244553134727e-06, |
|
"loss": 0.4718, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 4.650395778364116, |
|
"grad_norm": 9.750950813293457, |
|
"learning_rate": 8.544686527345487e-06, |
|
"loss": 0.4141, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 4.66688654353562, |
|
"grad_norm": 7.7578253746032715, |
|
"learning_rate": 8.539128501556247e-06, |
|
"loss": 0.4167, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 4.683377308707124, |
|
"grad_norm": 5.445390224456787, |
|
"learning_rate": 8.533570475767007e-06, |
|
"loss": 0.4324, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 4.699868073878628, |
|
"grad_norm": 9.907746315002441, |
|
"learning_rate": 8.52801244997777e-06, |
|
"loss": 0.4258, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 4.716358839050132, |
|
"grad_norm": 8.552475929260254, |
|
"learning_rate": 8.52245442418853e-06, |
|
"loss": 0.4233, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 4.732849604221636, |
|
"grad_norm": 8.171768188476562, |
|
"learning_rate": 8.51689639839929e-06, |
|
"loss": 0.3845, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 4.74934036939314, |
|
"grad_norm": 9.57198429107666, |
|
"learning_rate": 8.51133837261005e-06, |
|
"loss": 0.4482, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 4.765831134564644, |
|
"grad_norm": 8.749096870422363, |
|
"learning_rate": 8.50578034682081e-06, |
|
"loss": 0.4512, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 4.782321899736147, |
|
"grad_norm": 6.986504554748535, |
|
"learning_rate": 8.500222321031571e-06, |
|
"loss": 0.4268, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 4.7988126649076515, |
|
"grad_norm": 7.843720436096191, |
|
"learning_rate": 8.494664295242331e-06, |
|
"loss": 0.4414, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 4.8153034300791555, |
|
"grad_norm": 7.746464252471924, |
|
"learning_rate": 8.489106269453091e-06, |
|
"loss": 0.3829, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 4.83179419525066, |
|
"grad_norm": 6.705915451049805, |
|
"learning_rate": 8.483548243663851e-06, |
|
"loss": 0.3767, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 4.848284960422164, |
|
"grad_norm": 6.143075466156006, |
|
"learning_rate": 8.477990217874613e-06, |
|
"loss": 0.4318, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 4.864775725593668, |
|
"grad_norm": 9.569985389709473, |
|
"learning_rate": 8.472432192085373e-06, |
|
"loss": 0.4184, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 4.881266490765172, |
|
"grad_norm": 6.865624904632568, |
|
"learning_rate": 8.466874166296131e-06, |
|
"loss": 0.3741, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 4.897757255936676, |
|
"grad_norm": 8.196345329284668, |
|
"learning_rate": 8.461316140506893e-06, |
|
"loss": 0.4535, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 4.914248021108179, |
|
"grad_norm": 6.617861747741699, |
|
"learning_rate": 8.455758114717653e-06, |
|
"loss": 0.3955, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 4.930738786279683, |
|
"grad_norm": 6.829433441162109, |
|
"learning_rate": 8.450200088928413e-06, |
|
"loss": 0.3856, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 4.947229551451187, |
|
"grad_norm": 7.319567680358887, |
|
"learning_rate": 8.444642063139173e-06, |
|
"loss": 0.438, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.963720316622691, |
|
"grad_norm": 7.047403812408447, |
|
"learning_rate": 8.439084037349935e-06, |
|
"loss": 0.4061, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 4.980211081794195, |
|
"grad_norm": 9.969358444213867, |
|
"learning_rate": 8.433526011560695e-06, |
|
"loss": 0.392, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 4.996701846965699, |
|
"grad_norm": 8.516345977783203, |
|
"learning_rate": 8.427967985771455e-06, |
|
"loss": 0.4134, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.3021000921726227, |
|
"eval_runtime": 1273.4915, |
|
"eval_samples_per_second": 4.08, |
|
"eval_steps_per_second": 2.04, |
|
"eval_wer": 0.12830479329368777, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 5.013192612137203, |
|
"grad_norm": 7.070035457611084, |
|
"learning_rate": 8.422409959982215e-06, |
|
"loss": 0.3749, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.029683377308707, |
|
"grad_norm": 6.715669631958008, |
|
"learning_rate": 8.416851934192976e-06, |
|
"loss": 0.3547, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 5.046174142480211, |
|
"grad_norm": 6.825494766235352, |
|
"learning_rate": 8.411293908403736e-06, |
|
"loss": 0.3513, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 5.062664907651715, |
|
"grad_norm": 9.53160572052002, |
|
"learning_rate": 8.405735882614496e-06, |
|
"loss": 0.3648, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 5.0791556728232194, |
|
"grad_norm": 5.551097393035889, |
|
"learning_rate": 8.400177856825256e-06, |
|
"loss": 0.3457, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.095646437994723, |
|
"grad_norm": 8.277714729309082, |
|
"learning_rate": 8.394619831036016e-06, |
|
"loss": 0.3598, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 5.112137203166227, |
|
"grad_norm": 7.182380199432373, |
|
"learning_rate": 8.389061805246776e-06, |
|
"loss": 0.3625, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 5.128627968337731, |
|
"grad_norm": 7.097862243652344, |
|
"learning_rate": 8.383503779457536e-06, |
|
"loss": 0.4117, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 5.145118733509235, |
|
"grad_norm": 5.849909782409668, |
|
"learning_rate": 8.377945753668298e-06, |
|
"loss": 0.3609, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.161609498680739, |
|
"grad_norm": 7.363701343536377, |
|
"learning_rate": 8.372387727879058e-06, |
|
"loss": 0.3826, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 5.178100263852243, |
|
"grad_norm": 6.545772075653076, |
|
"learning_rate": 8.366829702089818e-06, |
|
"loss": 0.3679, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 5.194591029023747, |
|
"grad_norm": 11.929193496704102, |
|
"learning_rate": 8.361271676300578e-06, |
|
"loss": 0.3753, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 5.211081794195251, |
|
"grad_norm": 6.836696624755859, |
|
"learning_rate": 8.355713650511338e-06, |
|
"loss": 0.3905, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.227572559366755, |
|
"grad_norm": 9.196709632873535, |
|
"learning_rate": 8.3501556247221e-06, |
|
"loss": 0.364, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 5.244063324538258, |
|
"grad_norm": 8.637734413146973, |
|
"learning_rate": 8.34459759893286e-06, |
|
"loss": 0.3797, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 5.260554089709762, |
|
"grad_norm": 9.510890007019043, |
|
"learning_rate": 8.33903957314362e-06, |
|
"loss": 0.3592, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 5.277044854881266, |
|
"grad_norm": 5.116422176361084, |
|
"learning_rate": 8.33348154735438e-06, |
|
"loss": 0.4121, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.29353562005277, |
|
"grad_norm": 5.724400997161865, |
|
"learning_rate": 8.327923521565142e-06, |
|
"loss": 0.3679, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 5.310026385224274, |
|
"grad_norm": 6.654912948608398, |
|
"learning_rate": 8.322365495775902e-06, |
|
"loss": 0.3784, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 5.326517150395778, |
|
"grad_norm": 6.645880222320557, |
|
"learning_rate": 8.316807469986662e-06, |
|
"loss": 0.3809, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 5.3430079155672825, |
|
"grad_norm": 7.6851091384887695, |
|
"learning_rate": 8.311249444197422e-06, |
|
"loss": 0.3949, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 5.3594986807387865, |
|
"grad_norm": 6.103472709655762, |
|
"learning_rate": 8.305691418408182e-06, |
|
"loss": 0.3656, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 5.375989445910291, |
|
"grad_norm": 7.165574073791504, |
|
"learning_rate": 8.300133392618942e-06, |
|
"loss": 0.3331, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 5.392480211081795, |
|
"grad_norm": 5.427399635314941, |
|
"learning_rate": 8.294575366829702e-06, |
|
"loss": 0.349, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 5.408970976253298, |
|
"grad_norm": 7.878599643707275, |
|
"learning_rate": 8.289017341040463e-06, |
|
"loss": 0.3861, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 5.425461741424802, |
|
"grad_norm": 9.836017608642578, |
|
"learning_rate": 8.283459315251223e-06, |
|
"loss": 0.4138, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 5.441952506596306, |
|
"grad_norm": 7.767301559448242, |
|
"learning_rate": 8.277901289461984e-06, |
|
"loss": 0.4012, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 5.45844327176781, |
|
"grad_norm": 8.52707576751709, |
|
"learning_rate": 8.272343263672744e-06, |
|
"loss": 0.3942, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 5.474934036939314, |
|
"grad_norm": 8.378703117370605, |
|
"learning_rate": 8.266785237883505e-06, |
|
"loss": 0.3718, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 5.491424802110818, |
|
"grad_norm": 7.213738918304443, |
|
"learning_rate": 8.261227212094265e-06, |
|
"loss": 0.3959, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 5.507915567282322, |
|
"grad_norm": 8.569686889648438, |
|
"learning_rate": 8.255669186305025e-06, |
|
"loss": 0.4553, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 5.524406332453826, |
|
"grad_norm": 8.710777282714844, |
|
"learning_rate": 8.250111160515785e-06, |
|
"loss": 0.3511, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 5.540897097625329, |
|
"grad_norm": 8.045042037963867, |
|
"learning_rate": 8.244553134726547e-06, |
|
"loss": 0.3977, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 5.557387862796833, |
|
"grad_norm": 6.2842254638671875, |
|
"learning_rate": 8.238995108937307e-06, |
|
"loss": 0.374, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 5.573878627968337, |
|
"grad_norm": 8.507267951965332, |
|
"learning_rate": 8.233437083148067e-06, |
|
"loss": 0.4065, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 5.5903693931398415, |
|
"grad_norm": 7.953192234039307, |
|
"learning_rate": 8.227879057358827e-06, |
|
"loss": 0.4024, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 5.6068601583113455, |
|
"grad_norm": 5.938470840454102, |
|
"learning_rate": 8.222321031569587e-06, |
|
"loss": 0.369, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 5.62335092348285, |
|
"grad_norm": 8.270731925964355, |
|
"learning_rate": 8.216763005780347e-06, |
|
"loss": 0.3962, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 5.639841688654354, |
|
"grad_norm": 8.280898094177246, |
|
"learning_rate": 8.211204979991107e-06, |
|
"loss": 0.3711, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 5.656332453825858, |
|
"grad_norm": 8.00169849395752, |
|
"learning_rate": 8.205646954201867e-06, |
|
"loss": 0.4316, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 5.672823218997362, |
|
"grad_norm": 6.38738489151001, |
|
"learning_rate": 8.200088928412629e-06, |
|
"loss": 0.409, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 5.689313984168866, |
|
"grad_norm": 8.060171127319336, |
|
"learning_rate": 8.194753223654958e-06, |
|
"loss": 0.3912, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 5.70580474934037, |
|
"grad_norm": 7.570400238037109, |
|
"learning_rate": 8.18919519786572e-06, |
|
"loss": 0.4043, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 5.722295514511873, |
|
"grad_norm": 6.873922824859619, |
|
"learning_rate": 8.18363717207648e-06, |
|
"loss": 0.3764, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 5.738786279683377, |
|
"grad_norm": 7.591399669647217, |
|
"learning_rate": 8.17807914628724e-06, |
|
"loss": 0.3745, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 5.755277044854881, |
|
"grad_norm": 8.414856910705566, |
|
"learning_rate": 8.172521120498e-06, |
|
"loss": 0.4268, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 5.771767810026385, |
|
"grad_norm": 9.1331148147583, |
|
"learning_rate": 8.16696309470876e-06, |
|
"loss": 0.3676, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 5.788258575197889, |
|
"grad_norm": 6.92949104309082, |
|
"learning_rate": 8.16140506891952e-06, |
|
"loss": 0.3932, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 5.804749340369393, |
|
"grad_norm": 6.618691444396973, |
|
"learning_rate": 8.15584704313028e-06, |
|
"loss": 0.371, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 5.821240105540897, |
|
"grad_norm": 8.719780921936035, |
|
"learning_rate": 8.150289017341042e-06, |
|
"loss": 0.4156, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 5.837730870712401, |
|
"grad_norm": 4.5113525390625, |
|
"learning_rate": 8.144730991551802e-06, |
|
"loss": 0.4222, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 5.8542216358839045, |
|
"grad_norm": 7.771907329559326, |
|
"learning_rate": 8.139172965762562e-06, |
|
"loss": 0.3476, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 5.870712401055409, |
|
"grad_norm": 8.649697303771973, |
|
"learning_rate": 8.133614939973322e-06, |
|
"loss": 0.3834, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 5.887203166226913, |
|
"grad_norm": 11.093376159667969, |
|
"learning_rate": 8.128056914184082e-06, |
|
"loss": 0.3735, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 5.903693931398417, |
|
"grad_norm": 7.354765892028809, |
|
"learning_rate": 8.122498888394843e-06, |
|
"loss": 0.3949, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 5.920184696569921, |
|
"grad_norm": 8.032320022583008, |
|
"learning_rate": 8.116940862605604e-06, |
|
"loss": 0.3853, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 5.936675461741425, |
|
"grad_norm": 7.220771312713623, |
|
"learning_rate": 8.111382836816364e-06, |
|
"loss": 0.3608, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 5.953166226912929, |
|
"grad_norm": 7.930024147033691, |
|
"learning_rate": 8.105824811027124e-06, |
|
"loss": 0.3715, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 5.969656992084433, |
|
"grad_norm": 10.353165626525879, |
|
"learning_rate": 8.100266785237885e-06, |
|
"loss": 0.3852, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 5.986147757255937, |
|
"grad_norm": 7.248688220977783, |
|
"learning_rate": 8.094708759448644e-06, |
|
"loss": 0.3542, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.3076770603656769, |
|
"eval_runtime": 1501.2001, |
|
"eval_samples_per_second": 3.461, |
|
"eval_steps_per_second": 1.731, |
|
"eval_wer": 0.12749874614888587, |
|
"step": 9096 |
|
}, |
|
{ |
|
"epoch": 6.002638522427441, |
|
"grad_norm": 6.4646382331848145, |
|
"learning_rate": 8.089150733659404e-06, |
|
"loss": 0.3508, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 6.019129287598944, |
|
"grad_norm": 8.597702980041504, |
|
"learning_rate": 8.083592707870165e-06, |
|
"loss": 0.3326, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 6.035620052770448, |
|
"grad_norm": 8.334925651550293, |
|
"learning_rate": 8.078034682080925e-06, |
|
"loss": 0.3246, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 6.052110817941952, |
|
"grad_norm": 5.674637794494629, |
|
"learning_rate": 8.072476656291685e-06, |
|
"loss": 0.3472, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 6.068601583113456, |
|
"grad_norm": 4.868644714355469, |
|
"learning_rate": 8.066918630502445e-06, |
|
"loss": 0.3483, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 6.08509234828496, |
|
"grad_norm": 7.509459972381592, |
|
"learning_rate": 8.061360604713207e-06, |
|
"loss": 0.3372, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 6.101583113456464, |
|
"grad_norm": 6.369452953338623, |
|
"learning_rate": 8.055802578923967e-06, |
|
"loss": 0.3294, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 6.1180738786279685, |
|
"grad_norm": 6.736758232116699, |
|
"learning_rate": 8.050244553134727e-06, |
|
"loss": 0.317, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 6.1345646437994725, |
|
"grad_norm": 5.772039890289307, |
|
"learning_rate": 8.044686527345487e-06, |
|
"loss": 0.3138, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 6.151055408970977, |
|
"grad_norm": 8.263904571533203, |
|
"learning_rate": 8.039128501556249e-06, |
|
"loss": 0.3507, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 6.167546174142481, |
|
"grad_norm": 7.171966552734375, |
|
"learning_rate": 8.033570475767009e-06, |
|
"loss": 0.3343, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 6.184036939313984, |
|
"grad_norm": 7.375022888183594, |
|
"learning_rate": 8.028012449977769e-06, |
|
"loss": 0.328, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 6.200527704485488, |
|
"grad_norm": 7.487870693206787, |
|
"learning_rate": 8.022454424188529e-06, |
|
"loss": 0.3433, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 6.217018469656992, |
|
"grad_norm": 7.687648296356201, |
|
"learning_rate": 8.016896398399289e-06, |
|
"loss": 0.3625, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 6.233509234828496, |
|
"grad_norm": 8.015031814575195, |
|
"learning_rate": 8.011338372610049e-06, |
|
"loss": 0.3585, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 6.240160942077637, |
|
"learning_rate": 8.005780346820809e-06, |
|
"loss": 0.3272, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 6.266490765171504, |
|
"grad_norm": 5.806061744689941, |
|
"learning_rate": 8.00022232103157e-06, |
|
"loss": 0.3368, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.282981530343008, |
|
"grad_norm": 5.972324848175049, |
|
"learning_rate": 7.99466429524233e-06, |
|
"loss": 0.305, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 6.299472295514512, |
|
"grad_norm": 7.271647930145264, |
|
"learning_rate": 7.98910626945309e-06, |
|
"loss": 0.3777, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 6.315963060686016, |
|
"grad_norm": 8.508190155029297, |
|
"learning_rate": 7.98354824366385e-06, |
|
"loss": 0.3894, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 6.33245382585752, |
|
"grad_norm": 7.478120803833008, |
|
"learning_rate": 7.977990217874612e-06, |
|
"loss": 0.3469, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 6.348944591029023, |
|
"grad_norm": 7.627612113952637, |
|
"learning_rate": 7.972432192085372e-06, |
|
"loss": 0.3106, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 6.3654353562005275, |
|
"grad_norm": 6.731842041015625, |
|
"learning_rate": 7.966874166296132e-06, |
|
"loss": 0.3331, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 6.3819261213720315, |
|
"grad_norm": 5.524582862854004, |
|
"learning_rate": 7.961316140506892e-06, |
|
"loss": 0.3364, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 6.398416886543536, |
|
"grad_norm": 5.552459239959717, |
|
"learning_rate": 7.955758114717652e-06, |
|
"loss": 0.3239, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 6.41490765171504, |
|
"grad_norm": 8.744638442993164, |
|
"learning_rate": 7.950200088928414e-06, |
|
"loss": 0.3518, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 6.431398416886544, |
|
"grad_norm": 6.690433502197266, |
|
"learning_rate": 7.944642063139174e-06, |
|
"loss": 0.323, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 6.447889182058048, |
|
"grad_norm": 6.405791282653809, |
|
"learning_rate": 7.939084037349934e-06, |
|
"loss": 0.3255, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 6.464379947229552, |
|
"grad_norm": 8.274345397949219, |
|
"learning_rate": 7.933526011560694e-06, |
|
"loss": 0.3573, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 6.480870712401055, |
|
"grad_norm": 10.02904987335205, |
|
"learning_rate": 7.927967985771454e-06, |
|
"loss": 0.3627, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 6.497361477572559, |
|
"grad_norm": 8.587586402893066, |
|
"learning_rate": 7.922409959982214e-06, |
|
"loss": 0.3399, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 6.513852242744063, |
|
"grad_norm": 8.427138328552246, |
|
"learning_rate": 7.916851934192974e-06, |
|
"loss": 0.3729, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 6.530343007915567, |
|
"grad_norm": 11.161705017089844, |
|
"learning_rate": 7.911293908403736e-06, |
|
"loss": 0.3749, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 6.546833773087071, |
|
"grad_norm": 9.07302474975586, |
|
"learning_rate": 7.905735882614496e-06, |
|
"loss": 0.3293, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 6.563324538258575, |
|
"grad_norm": 5.79204797744751, |
|
"learning_rate": 7.900177856825256e-06, |
|
"loss": 0.352, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 6.579815303430079, |
|
"grad_norm": 8.09050178527832, |
|
"learning_rate": 7.894619831036016e-06, |
|
"loss": 0.3466, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 6.596306068601583, |
|
"grad_norm": 6.807469844818115, |
|
"learning_rate": 7.889061805246778e-06, |
|
"loss": 0.3827, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 6.612796833773087, |
|
"grad_norm": 5.356978416442871, |
|
"learning_rate": 7.883503779457538e-06, |
|
"loss": 0.3486, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 6.629287598944591, |
|
"grad_norm": 5.796497821807861, |
|
"learning_rate": 7.877945753668298e-06, |
|
"loss": 0.3431, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 6.6457783641160955, |
|
"grad_norm": 5.685765743255615, |
|
"learning_rate": 7.872387727879058e-06, |
|
"loss": 0.3423, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 6.662269129287599, |
|
"grad_norm": Infinity, |
|
"learning_rate": 7.867052023121387e-06, |
|
"loss": 0.3711, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 6.678759894459103, |
|
"grad_norm": 8.378765106201172, |
|
"learning_rate": 7.861493997332147e-06, |
|
"loss": 0.3662, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 6.695250659630607, |
|
"grad_norm": 9.546087265014648, |
|
"learning_rate": 7.855935971542909e-06, |
|
"loss": 0.3005, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 6.711741424802111, |
|
"grad_norm": 7.322122097015381, |
|
"learning_rate": 7.850377945753669e-06, |
|
"loss": 0.3464, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 6.728232189973615, |
|
"grad_norm": 8.071023941040039, |
|
"learning_rate": 7.844819919964429e-06, |
|
"loss": 0.3444, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 6.744722955145119, |
|
"grad_norm": 8.991476058959961, |
|
"learning_rate": 7.839261894175189e-06, |
|
"loss": 0.3779, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 6.761213720316623, |
|
"grad_norm": 7.23716402053833, |
|
"learning_rate": 7.83370386838595e-06, |
|
"loss": 0.3075, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 6.777704485488127, |
|
"grad_norm": 5.675558567047119, |
|
"learning_rate": 7.82814584259671e-06, |
|
"loss": 0.3185, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 6.79419525065963, |
|
"grad_norm": 9.553876876831055, |
|
"learning_rate": 7.82258781680747e-06, |
|
"loss": 0.3661, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 6.810686015831134, |
|
"grad_norm": 5.838205814361572, |
|
"learning_rate": 7.81702979101823e-06, |
|
"loss": 0.3378, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 6.827176781002638, |
|
"grad_norm": 7.847379207611084, |
|
"learning_rate": 7.811471765228992e-06, |
|
"loss": 0.3443, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 6.843667546174142, |
|
"grad_norm": 7.678672790527344, |
|
"learning_rate": 7.805913739439752e-06, |
|
"loss": 0.3265, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 6.860158311345646, |
|
"grad_norm": 7.874334335327148, |
|
"learning_rate": 7.80035571365051e-06, |
|
"loss": 0.3456, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 6.87664907651715, |
|
"grad_norm": 7.230392932891846, |
|
"learning_rate": 7.794797687861272e-06, |
|
"loss": 0.3702, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 6.8931398416886545, |
|
"grad_norm": 7.550044536590576, |
|
"learning_rate": 7.789239662072033e-06, |
|
"loss": 0.3428, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 6.9096306068601585, |
|
"grad_norm": 5.714991569519043, |
|
"learning_rate": 7.783681636282793e-06, |
|
"loss": 0.3442, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 6.926121372031663, |
|
"grad_norm": 9.231269836425781, |
|
"learning_rate": 7.778123610493553e-06, |
|
"loss": 0.3805, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 6.942612137203167, |
|
"grad_norm": 8.083724021911621, |
|
"learning_rate": 7.772565584704314e-06, |
|
"loss": 0.3497, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 6.959102902374671, |
|
"grad_norm": 5.767257213592529, |
|
"learning_rate": 7.767007558915074e-06, |
|
"loss": 0.3101, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 6.975593667546174, |
|
"grad_norm": 6.558534622192383, |
|
"learning_rate": 7.761449533125834e-06, |
|
"loss": 0.3563, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 6.992084432717678, |
|
"grad_norm": 8.02295207977295, |
|
"learning_rate": 7.755891507336594e-06, |
|
"loss": 0.3648, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.30907538533210754, |
|
"eval_runtime": 1524.829, |
|
"eval_samples_per_second": 3.408, |
|
"eval_steps_per_second": 1.704, |
|
"eval_wer": 0.12174894318263237, |
|
"step": 10612 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 45480, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.897929131311104e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|