whisper-large-v3-ft-btb-ca-cy / trainer_state.json
DewiBrynJones's picture
End of training
6f6fdf2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.7056277056277054,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013528138528138528,
"grad_norm": 18.23776626586914,
"learning_rate": 4.800000000000001e-07,
"loss": 1.7881,
"step": 25
},
{
"epoch": 0.027056277056277056,
"grad_norm": 11.490796089172363,
"learning_rate": 9.800000000000001e-07,
"loss": 1.4477,
"step": 50
},
{
"epoch": 0.040584415584415584,
"grad_norm": 10.601150512695312,
"learning_rate": 1.48e-06,
"loss": 1.0989,
"step": 75
},
{
"epoch": 0.05411255411255411,
"grad_norm": 12.294251441955566,
"learning_rate": 1.98e-06,
"loss": 0.9367,
"step": 100
},
{
"epoch": 0.06764069264069264,
"grad_norm": 11.920494079589844,
"learning_rate": 2.4800000000000004e-06,
"loss": 0.8698,
"step": 125
},
{
"epoch": 0.08116883116883117,
"grad_norm": 11.758705139160156,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.8531,
"step": 150
},
{
"epoch": 0.0946969696969697,
"grad_norm": 11.037556648254395,
"learning_rate": 3.48e-06,
"loss": 0.812,
"step": 175
},
{
"epoch": 0.10822510822510822,
"grad_norm": 10.065262794494629,
"learning_rate": 3.980000000000001e-06,
"loss": 0.7987,
"step": 200
},
{
"epoch": 0.12175324675324675,
"grad_norm": 9.124336242675781,
"learning_rate": 4.48e-06,
"loss": 0.7455,
"step": 225
},
{
"epoch": 0.13528138528138528,
"grad_norm": 10.971399307250977,
"learning_rate": 4.980000000000001e-06,
"loss": 0.7564,
"step": 250
},
{
"epoch": 0.1488095238095238,
"grad_norm": 9.226423263549805,
"learning_rate": 5.480000000000001e-06,
"loss": 0.7163,
"step": 275
},
{
"epoch": 0.16233766233766234,
"grad_norm": 9.523015022277832,
"learning_rate": 5.98e-06,
"loss": 0.7341,
"step": 300
},
{
"epoch": 0.17586580086580086,
"grad_norm": 8.390256881713867,
"learning_rate": 6.480000000000001e-06,
"loss": 0.7301,
"step": 325
},
{
"epoch": 0.1893939393939394,
"grad_norm": 9.996743202209473,
"learning_rate": 6.98e-06,
"loss": 0.6897,
"step": 350
},
{
"epoch": 0.20292207792207792,
"grad_norm": 9.470787048339844,
"learning_rate": 7.48e-06,
"loss": 0.6728,
"step": 375
},
{
"epoch": 0.21645021645021645,
"grad_norm": 8.221435546875,
"learning_rate": 7.980000000000002e-06,
"loss": 0.6853,
"step": 400
},
{
"epoch": 0.22997835497835498,
"grad_norm": 9.243407249450684,
"learning_rate": 8.48e-06,
"loss": 0.6899,
"step": 425
},
{
"epoch": 0.2435064935064935,
"grad_norm": 8.308032989501953,
"learning_rate": 8.96e-06,
"loss": 0.634,
"step": 450
},
{
"epoch": 0.25703463203463206,
"grad_norm": 8.970362663269043,
"learning_rate": 9.460000000000001e-06,
"loss": 0.6191,
"step": 475
},
{
"epoch": 0.27056277056277056,
"grad_norm": 9.167222023010254,
"learning_rate": 9.960000000000001e-06,
"loss": 0.6348,
"step": 500
},
{
"epoch": 0.2840909090909091,
"grad_norm": 8.824662208557129,
"learning_rate": 9.94888888888889e-06,
"loss": 0.6228,
"step": 525
},
{
"epoch": 0.2976190476190476,
"grad_norm": 10.600458145141602,
"learning_rate": 9.893333333333334e-06,
"loss": 0.622,
"step": 550
},
{
"epoch": 0.31114718614718617,
"grad_norm": 10.680913925170898,
"learning_rate": 9.837777777777778e-06,
"loss": 0.6115,
"step": 575
},
{
"epoch": 0.3246753246753247,
"grad_norm": 8.21044635772705,
"learning_rate": 9.782222222222222e-06,
"loss": 0.6219,
"step": 600
},
{
"epoch": 0.33820346320346323,
"grad_norm": 8.558358192443848,
"learning_rate": 9.726666666666668e-06,
"loss": 0.5936,
"step": 625
},
{
"epoch": 0.35173160173160173,
"grad_norm": 6.6742095947265625,
"learning_rate": 9.671111111111112e-06,
"loss": 0.5871,
"step": 650
},
{
"epoch": 0.3652597402597403,
"grad_norm": 8.321733474731445,
"learning_rate": 9.615555555555558e-06,
"loss": 0.5767,
"step": 675
},
{
"epoch": 0.3787878787878788,
"grad_norm": 8.88350772857666,
"learning_rate": 9.56e-06,
"loss": 0.5387,
"step": 700
},
{
"epoch": 0.39231601731601734,
"grad_norm": 8.155245780944824,
"learning_rate": 9.504444444444446e-06,
"loss": 0.5809,
"step": 725
},
{
"epoch": 0.40584415584415584,
"grad_norm": 8.907155990600586,
"learning_rate": 9.44888888888889e-06,
"loss": 0.5778,
"step": 750
},
{
"epoch": 0.4193722943722944,
"grad_norm": 9.427032470703125,
"learning_rate": 9.393333333333334e-06,
"loss": 0.5776,
"step": 775
},
{
"epoch": 0.4329004329004329,
"grad_norm": 6.904598236083984,
"learning_rate": 9.33777777777778e-06,
"loss": 0.5287,
"step": 800
},
{
"epoch": 0.44642857142857145,
"grad_norm": 7.866734504699707,
"learning_rate": 9.282222222222222e-06,
"loss": 0.5555,
"step": 825
},
{
"epoch": 0.45995670995670995,
"grad_norm": 7.301151752471924,
"learning_rate": 9.226666666666668e-06,
"loss": 0.5296,
"step": 850
},
{
"epoch": 0.4734848484848485,
"grad_norm": 9.370705604553223,
"learning_rate": 9.171111111111112e-06,
"loss": 0.5516,
"step": 875
},
{
"epoch": 0.487012987012987,
"grad_norm": 7.46251916885376,
"learning_rate": 9.115555555555556e-06,
"loss": 0.534,
"step": 900
},
{
"epoch": 0.5005411255411255,
"grad_norm": 7.885534286499023,
"learning_rate": 9.060000000000001e-06,
"loss": 0.5113,
"step": 925
},
{
"epoch": 0.5140692640692641,
"grad_norm": 6.12823486328125,
"learning_rate": 9.004444444444445e-06,
"loss": 0.5274,
"step": 950
},
{
"epoch": 0.5275974025974026,
"grad_norm": 7.1373515129089355,
"learning_rate": 8.94888888888889e-06,
"loss": 0.5241,
"step": 975
},
{
"epoch": 0.5411255411255411,
"grad_norm": 7.099331378936768,
"learning_rate": 8.893333333333333e-06,
"loss": 0.5152,
"step": 1000
},
{
"epoch": 0.5411255411255411,
"eval_loss": 0.4954243302345276,
"eval_runtime": 1779.4215,
"eval_samples_per_second": 2.192,
"eval_steps_per_second": 0.137,
"eval_wer": 0.3535207186322805,
"step": 1000
},
{
"epoch": 0.5546536796536796,
"grad_norm": 8.30470085144043,
"learning_rate": 8.83777777777778e-06,
"loss": 0.531,
"step": 1025
},
{
"epoch": 0.5681818181818182,
"grad_norm": 6.959774971008301,
"learning_rate": 8.782222222222223e-06,
"loss": 0.5038,
"step": 1050
},
{
"epoch": 0.5817099567099567,
"grad_norm": 7.844577789306641,
"learning_rate": 8.726666666666667e-06,
"loss": 0.5196,
"step": 1075
},
{
"epoch": 0.5952380952380952,
"grad_norm": 6.599257946014404,
"learning_rate": 8.671111111111113e-06,
"loss": 0.4982,
"step": 1100
},
{
"epoch": 0.6087662337662337,
"grad_norm": 5.671600818634033,
"learning_rate": 8.615555555555555e-06,
"loss": 0.497,
"step": 1125
},
{
"epoch": 0.6222943722943723,
"grad_norm": 6.545307636260986,
"learning_rate": 8.560000000000001e-06,
"loss": 0.4875,
"step": 1150
},
{
"epoch": 0.6358225108225108,
"grad_norm": 6.877360820770264,
"learning_rate": 8.504444444444445e-06,
"loss": 0.5162,
"step": 1175
},
{
"epoch": 0.6493506493506493,
"grad_norm": 7.325205326080322,
"learning_rate": 8.448888888888889e-06,
"loss": 0.5151,
"step": 1200
},
{
"epoch": 0.6628787878787878,
"grad_norm": 6.775233745574951,
"learning_rate": 8.393333333333335e-06,
"loss": 0.498,
"step": 1225
},
{
"epoch": 0.6764069264069265,
"grad_norm": 7.457151412963867,
"learning_rate": 8.337777777777777e-06,
"loss": 0.5012,
"step": 1250
},
{
"epoch": 0.689935064935065,
"grad_norm": 7.285881042480469,
"learning_rate": 8.282222222222223e-06,
"loss": 0.4684,
"step": 1275
},
{
"epoch": 0.7034632034632035,
"grad_norm": 9.163443565368652,
"learning_rate": 8.226666666666667e-06,
"loss": 0.5079,
"step": 1300
},
{
"epoch": 0.716991341991342,
"grad_norm": 7.168745994567871,
"learning_rate": 8.171111111111113e-06,
"loss": 0.475,
"step": 1325
},
{
"epoch": 0.7305194805194806,
"grad_norm": 7.457499027252197,
"learning_rate": 8.115555555555557e-06,
"loss": 0.488,
"step": 1350
},
{
"epoch": 0.7440476190476191,
"grad_norm": 6.2372846603393555,
"learning_rate": 8.06e-06,
"loss": 0.4822,
"step": 1375
},
{
"epoch": 0.7575757575757576,
"grad_norm": 5.880990505218506,
"learning_rate": 8.004444444444445e-06,
"loss": 0.531,
"step": 1400
},
{
"epoch": 0.7711038961038961,
"grad_norm": 7.057967185974121,
"learning_rate": 7.948888888888889e-06,
"loss": 0.4872,
"step": 1425
},
{
"epoch": 0.7846320346320347,
"grad_norm": 7.299345970153809,
"learning_rate": 7.893333333333335e-06,
"loss": 0.4749,
"step": 1450
},
{
"epoch": 0.7981601731601732,
"grad_norm": 6.807291030883789,
"learning_rate": 7.837777777777779e-06,
"loss": 0.4676,
"step": 1475
},
{
"epoch": 0.8116883116883117,
"grad_norm": 5.556617736816406,
"learning_rate": 7.782222222222223e-06,
"loss": 0.4614,
"step": 1500
},
{
"epoch": 0.8252164502164502,
"grad_norm": 6.165937900543213,
"learning_rate": 7.726666666666667e-06,
"loss": 0.4459,
"step": 1525
},
{
"epoch": 0.8387445887445888,
"grad_norm": 6.99851655960083,
"learning_rate": 7.67111111111111e-06,
"loss": 0.4734,
"step": 1550
},
{
"epoch": 0.8522727272727273,
"grad_norm": 7.385776519775391,
"learning_rate": 7.6155555555555564e-06,
"loss": 0.4547,
"step": 1575
},
{
"epoch": 0.8658008658008658,
"grad_norm": 6.626092910766602,
"learning_rate": 7.5600000000000005e-06,
"loss": 0.4462,
"step": 1600
},
{
"epoch": 0.8793290043290043,
"grad_norm": 6.563342094421387,
"learning_rate": 7.504444444444445e-06,
"loss": 0.4511,
"step": 1625
},
{
"epoch": 0.8928571428571429,
"grad_norm": 9.904861450195312,
"learning_rate": 7.44888888888889e-06,
"loss": 0.4728,
"step": 1650
},
{
"epoch": 0.9063852813852814,
"grad_norm": 7.5107622146606445,
"learning_rate": 7.393333333333333e-06,
"loss": 0.4867,
"step": 1675
},
{
"epoch": 0.9199134199134199,
"grad_norm": 6.618627548217773,
"learning_rate": 7.337777777777778e-06,
"loss": 0.4512,
"step": 1700
},
{
"epoch": 0.9334415584415584,
"grad_norm": 7.19182014465332,
"learning_rate": 7.282222222222222e-06,
"loss": 0.4657,
"step": 1725
},
{
"epoch": 0.946969696969697,
"grad_norm": 6.207240104675293,
"learning_rate": 7.226666666666667e-06,
"loss": 0.4455,
"step": 1750
},
{
"epoch": 0.9604978354978355,
"grad_norm": 8.109068870544434,
"learning_rate": 7.171111111111112e-06,
"loss": 0.4744,
"step": 1775
},
{
"epoch": 0.974025974025974,
"grad_norm": 7.550827503204346,
"learning_rate": 7.115555555555557e-06,
"loss": 0.4565,
"step": 1800
},
{
"epoch": 0.9875541125541125,
"grad_norm": 5.667859077453613,
"learning_rate": 7.06e-06,
"loss": 0.4368,
"step": 1825
},
{
"epoch": 1.001082251082251,
"grad_norm": 5.609886646270752,
"learning_rate": 7.004444444444445e-06,
"loss": 0.4406,
"step": 1850
},
{
"epoch": 1.0146103896103895,
"grad_norm": 4.862238883972168,
"learning_rate": 6.948888888888889e-06,
"loss": 0.3379,
"step": 1875
},
{
"epoch": 1.0281385281385282,
"grad_norm": 6.2563066482543945,
"learning_rate": 6.893333333333334e-06,
"loss": 0.3386,
"step": 1900
},
{
"epoch": 1.0416666666666667,
"grad_norm": 6.764842987060547,
"learning_rate": 6.837777777777779e-06,
"loss": 0.3534,
"step": 1925
},
{
"epoch": 1.0551948051948052,
"grad_norm": 5.962332248687744,
"learning_rate": 6.782222222222222e-06,
"loss": 0.3212,
"step": 1950
},
{
"epoch": 1.0687229437229437,
"grad_norm": 5.471970081329346,
"learning_rate": 6.726666666666667e-06,
"loss": 0.3572,
"step": 1975
},
{
"epoch": 1.0822510822510822,
"grad_norm": 6.054861545562744,
"learning_rate": 6.671111111111112e-06,
"loss": 0.3339,
"step": 2000
},
{
"epoch": 1.0822510822510822,
"eval_loss": 0.42054763436317444,
"eval_runtime": 1782.8878,
"eval_samples_per_second": 2.188,
"eval_steps_per_second": 0.137,
"eval_wer": 0.3197865353037767,
"step": 2000
},
{
"epoch": 1.0957792207792207,
"grad_norm": 6.194203853607178,
"learning_rate": 6.615555555555556e-06,
"loss": 0.339,
"step": 2025
},
{
"epoch": 1.1093073593073592,
"grad_norm": 5.470515727996826,
"learning_rate": 6.560000000000001e-06,
"loss": 0.3375,
"step": 2050
},
{
"epoch": 1.1228354978354977,
"grad_norm": 5.1414618492126465,
"learning_rate": 6.504444444444446e-06,
"loss": 0.3348,
"step": 2075
},
{
"epoch": 1.1363636363636362,
"grad_norm": 5.000445365905762,
"learning_rate": 6.448888888888889e-06,
"loss": 0.3255,
"step": 2100
},
{
"epoch": 1.149891774891775,
"grad_norm": 5.545360088348389,
"learning_rate": 6.393333333333334e-06,
"loss": 0.3296,
"step": 2125
},
{
"epoch": 1.1634199134199135,
"grad_norm": 5.920198440551758,
"learning_rate": 6.3377777777777786e-06,
"loss": 0.3436,
"step": 2150
},
{
"epoch": 1.176948051948052,
"grad_norm": 5.722521781921387,
"learning_rate": 6.282222222222223e-06,
"loss": 0.3366,
"step": 2175
},
{
"epoch": 1.1904761904761905,
"grad_norm": 6.066483020782471,
"learning_rate": 6.2266666666666675e-06,
"loss": 0.332,
"step": 2200
},
{
"epoch": 1.204004329004329,
"grad_norm": 6.301929473876953,
"learning_rate": 6.171111111111112e-06,
"loss": 0.3207,
"step": 2225
},
{
"epoch": 1.2175324675324675,
"grad_norm": 5.607754230499268,
"learning_rate": 6.1155555555555555e-06,
"loss": 0.3338,
"step": 2250
},
{
"epoch": 1.231060606060606,
"grad_norm": 5.145053863525391,
"learning_rate": 6.0600000000000004e-06,
"loss": 0.3268,
"step": 2275
},
{
"epoch": 1.2445887445887447,
"grad_norm": 5.448360443115234,
"learning_rate": 6.004444444444445e-06,
"loss": 0.3365,
"step": 2300
},
{
"epoch": 1.2581168831168832,
"grad_norm": 5.5156474113464355,
"learning_rate": 5.948888888888889e-06,
"loss": 0.3268,
"step": 2325
},
{
"epoch": 1.2716450216450217,
"grad_norm": 5.252381324768066,
"learning_rate": 5.893333333333334e-06,
"loss": 0.3228,
"step": 2350
},
{
"epoch": 1.2851731601731602,
"grad_norm": 5.7689313888549805,
"learning_rate": 5.837777777777777e-06,
"loss": 0.3304,
"step": 2375
},
{
"epoch": 1.2987012987012987,
"grad_norm": 4.822956085205078,
"learning_rate": 5.782222222222222e-06,
"loss": 0.3066,
"step": 2400
},
{
"epoch": 1.3122294372294372,
"grad_norm": 5.012087345123291,
"learning_rate": 5.726666666666667e-06,
"loss": 0.3323,
"step": 2425
},
{
"epoch": 1.3257575757575757,
"grad_norm": 5.262439250946045,
"learning_rate": 5.671111111111112e-06,
"loss": 0.3256,
"step": 2450
},
{
"epoch": 1.3392857142857144,
"grad_norm": 5.300339221954346,
"learning_rate": 5.615555555555556e-06,
"loss": 0.3287,
"step": 2475
},
{
"epoch": 1.3528138528138527,
"grad_norm": 6.058621883392334,
"learning_rate": 5.560000000000001e-06,
"loss": 0.3283,
"step": 2500
},
{
"epoch": 1.3663419913419914,
"grad_norm": 6.223220348358154,
"learning_rate": 5.504444444444444e-06,
"loss": 0.3288,
"step": 2525
},
{
"epoch": 1.37987012987013,
"grad_norm": 5.865265369415283,
"learning_rate": 5.448888888888889e-06,
"loss": 0.3303,
"step": 2550
},
{
"epoch": 1.3933982683982684,
"grad_norm": 4.715255260467529,
"learning_rate": 5.393333333333334e-06,
"loss": 0.3432,
"step": 2575
},
{
"epoch": 1.406926406926407,
"grad_norm": 5.57729434967041,
"learning_rate": 5.337777777777779e-06,
"loss": 0.3174,
"step": 2600
},
{
"epoch": 1.4204545454545454,
"grad_norm": 6.372653484344482,
"learning_rate": 5.282222222222223e-06,
"loss": 0.3251,
"step": 2625
},
{
"epoch": 1.433982683982684,
"grad_norm": 6.7026848793029785,
"learning_rate": 5.226666666666667e-06,
"loss": 0.3258,
"step": 2650
},
{
"epoch": 1.4475108225108224,
"grad_norm": 5.12203311920166,
"learning_rate": 5.171111111111111e-06,
"loss": 0.3217,
"step": 2675
},
{
"epoch": 1.4610389610389611,
"grad_norm": 7.778601169586182,
"learning_rate": 5.115555555555556e-06,
"loss": 0.3182,
"step": 2700
},
{
"epoch": 1.4745670995670996,
"grad_norm": 4.994805335998535,
"learning_rate": 5.060000000000001e-06,
"loss": 0.3142,
"step": 2725
},
{
"epoch": 1.4880952380952381,
"grad_norm": 6.392801761627197,
"learning_rate": 5.004444444444445e-06,
"loss": 0.3289,
"step": 2750
},
{
"epoch": 1.5016233766233766,
"grad_norm": 5.523842811584473,
"learning_rate": 4.94888888888889e-06,
"loss": 0.3248,
"step": 2775
},
{
"epoch": 1.5151515151515151,
"grad_norm": 5.348546981811523,
"learning_rate": 4.893333333333334e-06,
"loss": 0.303,
"step": 2800
},
{
"epoch": 1.5286796536796536,
"grad_norm": 5.714568614959717,
"learning_rate": 4.837777777777778e-06,
"loss": 0.3144,
"step": 2825
},
{
"epoch": 1.5422077922077921,
"grad_norm": 5.544715404510498,
"learning_rate": 4.7822222222222226e-06,
"loss": 0.3183,
"step": 2850
},
{
"epoch": 1.5557359307359309,
"grad_norm": 6.49782133102417,
"learning_rate": 4.7266666666666674e-06,
"loss": 0.2981,
"step": 2875
},
{
"epoch": 1.5692640692640691,
"grad_norm": 5.356492042541504,
"learning_rate": 4.6711111111111115e-06,
"loss": 0.3159,
"step": 2900
},
{
"epoch": 1.5827922077922079,
"grad_norm": 5.4491119384765625,
"learning_rate": 4.6155555555555555e-06,
"loss": 0.3333,
"step": 2925
},
{
"epoch": 1.5963203463203464,
"grad_norm": 5.832214832305908,
"learning_rate": 4.56e-06,
"loss": 0.3141,
"step": 2950
},
{
"epoch": 1.6098484848484849,
"grad_norm": 5.139626979827881,
"learning_rate": 4.504444444444444e-06,
"loss": 0.308,
"step": 2975
},
{
"epoch": 1.6233766233766234,
"grad_norm": 5.6519999504089355,
"learning_rate": 4.448888888888889e-06,
"loss": 0.3189,
"step": 3000
},
{
"epoch": 1.6233766233766234,
"eval_loss": 0.3910607397556305,
"eval_runtime": 1770.6955,
"eval_samples_per_second": 2.203,
"eval_steps_per_second": 0.138,
"eval_wer": 0.29134067420071474,
"step": 3000
},
{
"epoch": 1.6369047619047619,
"grad_norm": 5.791579246520996,
"learning_rate": 4.393333333333334e-06,
"loss": 0.3122,
"step": 3025
},
{
"epoch": 1.6504329004329006,
"grad_norm": 5.640200614929199,
"learning_rate": 4.337777777777778e-06,
"loss": 0.3062,
"step": 3050
},
{
"epoch": 1.6639610389610389,
"grad_norm": 5.585713863372803,
"learning_rate": 4.282222222222222e-06,
"loss": 0.3208,
"step": 3075
},
{
"epoch": 1.6774891774891776,
"grad_norm": 5.871087074279785,
"learning_rate": 4.226666666666667e-06,
"loss": 0.3071,
"step": 3100
},
{
"epoch": 1.6910173160173159,
"grad_norm": 5.412327766418457,
"learning_rate": 4.171111111111111e-06,
"loss": 0.3167,
"step": 3125
},
{
"epoch": 1.7045454545454546,
"grad_norm": 5.0698561668396,
"learning_rate": 4.115555555555556e-06,
"loss": 0.3231,
"step": 3150
},
{
"epoch": 1.718073593073593,
"grad_norm": 5.63693904876709,
"learning_rate": 4.060000000000001e-06,
"loss": 0.3128,
"step": 3175
},
{
"epoch": 1.7316017316017316,
"grad_norm": 5.766589164733887,
"learning_rate": 4.004444444444445e-06,
"loss": 0.3229,
"step": 3200
},
{
"epoch": 1.74512987012987,
"grad_norm": 5.414788246154785,
"learning_rate": 3.948888888888889e-06,
"loss": 0.2917,
"step": 3225
},
{
"epoch": 1.7586580086580086,
"grad_norm": 5.106072902679443,
"learning_rate": 3.893333333333333e-06,
"loss": 0.311,
"step": 3250
},
{
"epoch": 1.7721861471861473,
"grad_norm": 4.694611549377441,
"learning_rate": 3.837777777777778e-06,
"loss": 0.3228,
"step": 3275
},
{
"epoch": 1.7857142857142856,
"grad_norm": 6.422979354858398,
"learning_rate": 3.782222222222223e-06,
"loss": 0.314,
"step": 3300
},
{
"epoch": 1.7992424242424243,
"grad_norm": 5.5537567138671875,
"learning_rate": 3.726666666666667e-06,
"loss": 0.303,
"step": 3325
},
{
"epoch": 1.8127705627705628,
"grad_norm": 6.503033638000488,
"learning_rate": 3.6711111111111113e-06,
"loss": 0.336,
"step": 3350
},
{
"epoch": 1.8262987012987013,
"grad_norm": 5.406898021697998,
"learning_rate": 3.615555555555556e-06,
"loss": 0.3031,
"step": 3375
},
{
"epoch": 1.8398268398268398,
"grad_norm": 6.486941337585449,
"learning_rate": 3.5600000000000002e-06,
"loss": 0.3203,
"step": 3400
},
{
"epoch": 1.8533549783549783,
"grad_norm": 7.027703285217285,
"learning_rate": 3.5044444444444447e-06,
"loss": 0.3159,
"step": 3425
},
{
"epoch": 1.866883116883117,
"grad_norm": 5.475865364074707,
"learning_rate": 3.4488888888888896e-06,
"loss": 0.3239,
"step": 3450
},
{
"epoch": 1.8804112554112553,
"grad_norm": 6.124994277954102,
"learning_rate": 3.3933333333333336e-06,
"loss": 0.2928,
"step": 3475
},
{
"epoch": 1.893939393939394,
"grad_norm": 4.759301662445068,
"learning_rate": 3.337777777777778e-06,
"loss": 0.2862,
"step": 3500
},
{
"epoch": 1.9074675324675323,
"grad_norm": 5.548280239105225,
"learning_rate": 3.282222222222223e-06,
"loss": 0.312,
"step": 3525
},
{
"epoch": 1.920995670995671,
"grad_norm": 5.691162109375,
"learning_rate": 3.226666666666667e-06,
"loss": 0.3161,
"step": 3550
},
{
"epoch": 1.9345238095238095,
"grad_norm": 6.089394569396973,
"learning_rate": 3.1711111111111114e-06,
"loss": 0.3028,
"step": 3575
},
{
"epoch": 1.948051948051948,
"grad_norm": 5.725650310516357,
"learning_rate": 3.1155555555555555e-06,
"loss": 0.3058,
"step": 3600
},
{
"epoch": 1.9615800865800865,
"grad_norm": 5.124326705932617,
"learning_rate": 3.0600000000000003e-06,
"loss": 0.2947,
"step": 3625
},
{
"epoch": 1.975108225108225,
"grad_norm": 6.62967586517334,
"learning_rate": 3.004444444444445e-06,
"loss": 0.318,
"step": 3650
},
{
"epoch": 1.9886363636363638,
"grad_norm": 6.150094985961914,
"learning_rate": 2.948888888888889e-06,
"loss": 0.3257,
"step": 3675
},
{
"epoch": 2.002164502164502,
"grad_norm": 3.962730884552002,
"learning_rate": 2.8933333333333337e-06,
"loss": 0.29,
"step": 3700
},
{
"epoch": 2.0156926406926408,
"grad_norm": 3.999758005142212,
"learning_rate": 2.837777777777778e-06,
"loss": 0.2127,
"step": 3725
},
{
"epoch": 2.029220779220779,
"grad_norm": 4.3916015625,
"learning_rate": 2.7822222222222222e-06,
"loss": 0.2073,
"step": 3750
},
{
"epoch": 2.0427489177489178,
"grad_norm": 4.647676944732666,
"learning_rate": 2.726666666666667e-06,
"loss": 0.2065,
"step": 3775
},
{
"epoch": 2.0562770562770565,
"grad_norm": 5.331233501434326,
"learning_rate": 2.6711111111111116e-06,
"loss": 0.208,
"step": 3800
},
{
"epoch": 2.0698051948051948,
"grad_norm": 4.8974995613098145,
"learning_rate": 2.6155555555555556e-06,
"loss": 0.2007,
"step": 3825
},
{
"epoch": 2.0833333333333335,
"grad_norm": 4.972061634063721,
"learning_rate": 2.56e-06,
"loss": 0.2017,
"step": 3850
},
{
"epoch": 2.0968614718614718,
"grad_norm": 5.035933494567871,
"learning_rate": 2.504444444444445e-06,
"loss": 0.2075,
"step": 3875
},
{
"epoch": 2.1103896103896105,
"grad_norm": 6.620712757110596,
"learning_rate": 2.448888888888889e-06,
"loss": 0.2074,
"step": 3900
},
{
"epoch": 2.1239177489177488,
"grad_norm": 7.161535739898682,
"learning_rate": 2.3933333333333334e-06,
"loss": 0.2075,
"step": 3925
},
{
"epoch": 2.1374458874458875,
"grad_norm": 5.531479358673096,
"learning_rate": 2.337777777777778e-06,
"loss": 0.2088,
"step": 3950
},
{
"epoch": 2.150974025974026,
"grad_norm": 4.983880043029785,
"learning_rate": 2.2822222222222223e-06,
"loss": 0.2103,
"step": 3975
},
{
"epoch": 2.1645021645021645,
"grad_norm": 4.373054504394531,
"learning_rate": 2.226666666666667e-06,
"loss": 0.2051,
"step": 4000
},
{
"epoch": 2.1645021645021645,
"eval_loss": 0.38633546233177185,
"eval_runtime": 1768.38,
"eval_samples_per_second": 2.206,
"eval_steps_per_second": 0.138,
"eval_wer": 0.27895296049454266,
"step": 4000
},
{
"epoch": 2.178030303030303,
"grad_norm": 5.22249174118042,
"learning_rate": 2.1711111111111113e-06,
"loss": 0.1988,
"step": 4025
},
{
"epoch": 2.1915584415584415,
"grad_norm": 3.8173792362213135,
"learning_rate": 2.1155555555555557e-06,
"loss": 0.2241,
"step": 4050
},
{
"epoch": 2.20508658008658,
"grad_norm": 5.271940231323242,
"learning_rate": 2.06e-06,
"loss": 0.201,
"step": 4075
},
{
"epoch": 2.2186147186147185,
"grad_norm": 4.359199523925781,
"learning_rate": 2.0044444444444446e-06,
"loss": 0.1946,
"step": 4100
},
{
"epoch": 2.232142857142857,
"grad_norm": 4.675993919372559,
"learning_rate": 1.948888888888889e-06,
"loss": 0.2123,
"step": 4125
},
{
"epoch": 2.2456709956709955,
"grad_norm": 4.090628147125244,
"learning_rate": 1.8933333333333333e-06,
"loss": 0.2045,
"step": 4150
},
{
"epoch": 2.259199134199134,
"grad_norm": 3.5872035026550293,
"learning_rate": 1.837777777777778e-06,
"loss": 0.2042,
"step": 4175
},
{
"epoch": 2.2727272727272725,
"grad_norm": 4.375498294830322,
"learning_rate": 1.7822222222222225e-06,
"loss": 0.2056,
"step": 4200
},
{
"epoch": 2.286255411255411,
"grad_norm": 4.972550868988037,
"learning_rate": 1.7266666666666667e-06,
"loss": 0.2024,
"step": 4225
},
{
"epoch": 2.29978354978355,
"grad_norm": 4.7940168380737305,
"learning_rate": 1.6711111111111112e-06,
"loss": 0.1996,
"step": 4250
},
{
"epoch": 2.313311688311688,
"grad_norm": 4.399414539337158,
"learning_rate": 1.6155555555555559e-06,
"loss": 0.2101,
"step": 4275
},
{
"epoch": 2.326839826839827,
"grad_norm": 5.292896747589111,
"learning_rate": 1.56e-06,
"loss": 0.1929,
"step": 4300
},
{
"epoch": 2.340367965367965,
"grad_norm": 4.333370685577393,
"learning_rate": 1.5044444444444446e-06,
"loss": 0.208,
"step": 4325
},
{
"epoch": 2.353896103896104,
"grad_norm": 4.69057035446167,
"learning_rate": 1.4488888888888892e-06,
"loss": 0.1905,
"step": 4350
},
{
"epoch": 2.367424242424242,
"grad_norm": 4.56622838973999,
"learning_rate": 1.3933333333333335e-06,
"loss": 0.2051,
"step": 4375
},
{
"epoch": 2.380952380952381,
"grad_norm": 4.605253219604492,
"learning_rate": 1.337777777777778e-06,
"loss": 0.1978,
"step": 4400
},
{
"epoch": 2.3944805194805197,
"grad_norm": 5.727032661437988,
"learning_rate": 1.2822222222222222e-06,
"loss": 0.2002,
"step": 4425
},
{
"epoch": 2.408008658008658,
"grad_norm": 5.882457256317139,
"learning_rate": 1.2266666666666666e-06,
"loss": 0.202,
"step": 4450
},
{
"epoch": 2.4215367965367967,
"grad_norm": 4.464743614196777,
"learning_rate": 1.171111111111111e-06,
"loss": 0.2145,
"step": 4475
},
{
"epoch": 2.435064935064935,
"grad_norm": 4.503987789154053,
"learning_rate": 1.1155555555555558e-06,
"loss": 0.2101,
"step": 4500
},
{
"epoch": 2.4485930735930737,
"grad_norm": 5.735741138458252,
"learning_rate": 1.06e-06,
"loss": 0.1913,
"step": 4525
},
{
"epoch": 2.462121212121212,
"grad_norm": 4.6319098472595215,
"learning_rate": 1.0044444444444445e-06,
"loss": 0.2001,
"step": 4550
},
{
"epoch": 2.4756493506493507,
"grad_norm": 5.589540958404541,
"learning_rate": 9.488888888888889e-07,
"loss": 0.1981,
"step": 4575
},
{
"epoch": 2.4891774891774894,
"grad_norm": 4.481135845184326,
"learning_rate": 8.933333333333334e-07,
"loss": 0.198,
"step": 4600
},
{
"epoch": 2.5027056277056277,
"grad_norm": 6.087165355682373,
"learning_rate": 8.37777777777778e-07,
"loss": 0.2131,
"step": 4625
},
{
"epoch": 2.5162337662337664,
"grad_norm": 4.635289669036865,
"learning_rate": 7.822222222222223e-07,
"loss": 0.2088,
"step": 4650
},
{
"epoch": 2.5297619047619047,
"grad_norm": 4.698585510253906,
"learning_rate": 7.266666666666668e-07,
"loss": 0.2057,
"step": 4675
},
{
"epoch": 2.5432900432900434,
"grad_norm": 4.562716960906982,
"learning_rate": 6.711111111111111e-07,
"loss": 0.2117,
"step": 4700
},
{
"epoch": 2.5568181818181817,
"grad_norm": 5.381985187530518,
"learning_rate": 6.155555555555556e-07,
"loss": 0.1975,
"step": 4725
},
{
"epoch": 2.5703463203463204,
"grad_norm": 5.667773723602295,
"learning_rate": 5.6e-07,
"loss": 0.2409,
"step": 4750
},
{
"epoch": 2.583874458874459,
"grad_norm": 4.565330982208252,
"learning_rate": 5.044444444444445e-07,
"loss": 0.1915,
"step": 4775
},
{
"epoch": 2.5974025974025974,
"grad_norm": 5.17742395401001,
"learning_rate": 4.488888888888889e-07,
"loss": 0.1973,
"step": 4800
},
{
"epoch": 2.6109307359307357,
"grad_norm": 4.878474712371826,
"learning_rate": 3.9333333333333336e-07,
"loss": 0.2209,
"step": 4825
},
{
"epoch": 2.6244588744588744,
"grad_norm": 5.2556328773498535,
"learning_rate": 3.3777777777777777e-07,
"loss": 0.204,
"step": 4850
},
{
"epoch": 2.637987012987013,
"grad_norm": 3.8071792125701904,
"learning_rate": 2.822222222222222e-07,
"loss": 0.2052,
"step": 4875
},
{
"epoch": 2.6515151515151514,
"grad_norm": 4.218277454376221,
"learning_rate": 2.266666666666667e-07,
"loss": 0.1989,
"step": 4900
},
{
"epoch": 2.66504329004329,
"grad_norm": 5.260907173156738,
"learning_rate": 1.7111111111111114e-07,
"loss": 0.1915,
"step": 4925
},
{
"epoch": 2.678571428571429,
"grad_norm": 4.497314453125,
"learning_rate": 1.1555555555555556e-07,
"loss": 0.1997,
"step": 4950
},
{
"epoch": 2.692099567099567,
"grad_norm": 4.543353080749512,
"learning_rate": 6.000000000000001e-08,
"loss": 0.1866,
"step": 4975
},
{
"epoch": 2.7056277056277054,
"grad_norm": 6.265724182128906,
"learning_rate": 4.444444444444445e-09,
"loss": 0.202,
"step": 5000
},
{
"epoch": 2.7056277056277054,
"eval_loss": 0.38099274039268494,
"eval_runtime": 1768.0451,
"eval_samples_per_second": 2.206,
"eval_steps_per_second": 0.138,
"eval_wer": 0.27501690331304934,
"step": 5000
},
{
"epoch": 2.7056277056277054,
"step": 5000,
"total_flos": 5.434978041004032e+20,
"train_loss": 0.39443905401229856,
"train_runtime": 57517.0908,
"train_samples_per_second": 2.782,
"train_steps_per_second": 0.087
}
],
"logging_steps": 25,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.434978041004032e+20,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}