|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7056277056277054, |
|
"eval_steps": 1000, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013528138528138528, |
|
"grad_norm": 18.23776626586914, |
|
"learning_rate": 4.800000000000001e-07, |
|
"loss": 1.7881, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.027056277056277056, |
|
"grad_norm": 11.490796089172363, |
|
"learning_rate": 9.800000000000001e-07, |
|
"loss": 1.4477, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.040584415584415584, |
|
"grad_norm": 10.601150512695312, |
|
"learning_rate": 1.48e-06, |
|
"loss": 1.0989, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05411255411255411, |
|
"grad_norm": 12.294251441955566, |
|
"learning_rate": 1.98e-06, |
|
"loss": 0.9367, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06764069264069264, |
|
"grad_norm": 11.920494079589844, |
|
"learning_rate": 2.4800000000000004e-06, |
|
"loss": 0.8698, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08116883116883117, |
|
"grad_norm": 11.758705139160156, |
|
"learning_rate": 2.9800000000000003e-06, |
|
"loss": 0.8531, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0946969696969697, |
|
"grad_norm": 11.037556648254395, |
|
"learning_rate": 3.48e-06, |
|
"loss": 0.812, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.10822510822510822, |
|
"grad_norm": 10.065262794494629, |
|
"learning_rate": 3.980000000000001e-06, |
|
"loss": 0.7987, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12175324675324675, |
|
"grad_norm": 9.124336242675781, |
|
"learning_rate": 4.48e-06, |
|
"loss": 0.7455, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13528138528138528, |
|
"grad_norm": 10.971399307250977, |
|
"learning_rate": 4.980000000000001e-06, |
|
"loss": 0.7564, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1488095238095238, |
|
"grad_norm": 9.226423263549805, |
|
"learning_rate": 5.480000000000001e-06, |
|
"loss": 0.7163, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16233766233766234, |
|
"grad_norm": 9.523015022277832, |
|
"learning_rate": 5.98e-06, |
|
"loss": 0.7341, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17586580086580086, |
|
"grad_norm": 8.390256881713867, |
|
"learning_rate": 6.480000000000001e-06, |
|
"loss": 0.7301, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1893939393939394, |
|
"grad_norm": 9.996743202209473, |
|
"learning_rate": 6.98e-06, |
|
"loss": 0.6897, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.20292207792207792, |
|
"grad_norm": 9.470787048339844, |
|
"learning_rate": 7.48e-06, |
|
"loss": 0.6728, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.21645021645021645, |
|
"grad_norm": 8.221435546875, |
|
"learning_rate": 7.980000000000002e-06, |
|
"loss": 0.6853, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.22997835497835498, |
|
"grad_norm": 9.243407249450684, |
|
"learning_rate": 8.48e-06, |
|
"loss": 0.6899, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2435064935064935, |
|
"grad_norm": 8.308032989501953, |
|
"learning_rate": 8.96e-06, |
|
"loss": 0.634, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.25703463203463206, |
|
"grad_norm": 8.970362663269043, |
|
"learning_rate": 9.460000000000001e-06, |
|
"loss": 0.6191, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.27056277056277056, |
|
"grad_norm": 9.167222023010254, |
|
"learning_rate": 9.960000000000001e-06, |
|
"loss": 0.6348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 8.824662208557129, |
|
"learning_rate": 9.94888888888889e-06, |
|
"loss": 0.6228, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2976190476190476, |
|
"grad_norm": 10.600458145141602, |
|
"learning_rate": 9.893333333333334e-06, |
|
"loss": 0.622, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.31114718614718617, |
|
"grad_norm": 10.680913925170898, |
|
"learning_rate": 9.837777777777778e-06, |
|
"loss": 0.6115, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.3246753246753247, |
|
"grad_norm": 8.21044635772705, |
|
"learning_rate": 9.782222222222222e-06, |
|
"loss": 0.6219, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.33820346320346323, |
|
"grad_norm": 8.558358192443848, |
|
"learning_rate": 9.726666666666668e-06, |
|
"loss": 0.5936, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.35173160173160173, |
|
"grad_norm": 6.6742095947265625, |
|
"learning_rate": 9.671111111111112e-06, |
|
"loss": 0.5871, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3652597402597403, |
|
"grad_norm": 8.321733474731445, |
|
"learning_rate": 9.615555555555558e-06, |
|
"loss": 0.5767, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.3787878787878788, |
|
"grad_norm": 8.88350772857666, |
|
"learning_rate": 9.56e-06, |
|
"loss": 0.5387, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.39231601731601734, |
|
"grad_norm": 8.155245780944824, |
|
"learning_rate": 9.504444444444446e-06, |
|
"loss": 0.5809, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.40584415584415584, |
|
"grad_norm": 8.907155990600586, |
|
"learning_rate": 9.44888888888889e-06, |
|
"loss": 0.5778, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.4193722943722944, |
|
"grad_norm": 9.427032470703125, |
|
"learning_rate": 9.393333333333334e-06, |
|
"loss": 0.5776, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.4329004329004329, |
|
"grad_norm": 6.904598236083984, |
|
"learning_rate": 9.33777777777778e-06, |
|
"loss": 0.5287, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.44642857142857145, |
|
"grad_norm": 7.866734504699707, |
|
"learning_rate": 9.282222222222222e-06, |
|
"loss": 0.5555, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.45995670995670995, |
|
"grad_norm": 7.301151752471924, |
|
"learning_rate": 9.226666666666668e-06, |
|
"loss": 0.5296, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4734848484848485, |
|
"grad_norm": 9.370705604553223, |
|
"learning_rate": 9.171111111111112e-06, |
|
"loss": 0.5516, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.487012987012987, |
|
"grad_norm": 7.46251916885376, |
|
"learning_rate": 9.115555555555556e-06, |
|
"loss": 0.534, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5005411255411255, |
|
"grad_norm": 7.885534286499023, |
|
"learning_rate": 9.060000000000001e-06, |
|
"loss": 0.5113, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.5140692640692641, |
|
"grad_norm": 6.12823486328125, |
|
"learning_rate": 9.004444444444445e-06, |
|
"loss": 0.5274, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5275974025974026, |
|
"grad_norm": 7.1373515129089355, |
|
"learning_rate": 8.94888888888889e-06, |
|
"loss": 0.5241, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5411255411255411, |
|
"grad_norm": 7.099331378936768, |
|
"learning_rate": 8.893333333333333e-06, |
|
"loss": 0.5152, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5411255411255411, |
|
"eval_loss": 0.4954243302345276, |
|
"eval_runtime": 1779.4215, |
|
"eval_samples_per_second": 2.192, |
|
"eval_steps_per_second": 0.137, |
|
"eval_wer": 0.3535207186322805, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5546536796536796, |
|
"grad_norm": 8.30470085144043, |
|
"learning_rate": 8.83777777777778e-06, |
|
"loss": 0.531, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 6.959774971008301, |
|
"learning_rate": 8.782222222222223e-06, |
|
"loss": 0.5038, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5817099567099567, |
|
"grad_norm": 7.844577789306641, |
|
"learning_rate": 8.726666666666667e-06, |
|
"loss": 0.5196, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5952380952380952, |
|
"grad_norm": 6.599257946014404, |
|
"learning_rate": 8.671111111111113e-06, |
|
"loss": 0.4982, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6087662337662337, |
|
"grad_norm": 5.671600818634033, |
|
"learning_rate": 8.615555555555555e-06, |
|
"loss": 0.497, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.6222943722943723, |
|
"grad_norm": 6.545307636260986, |
|
"learning_rate": 8.560000000000001e-06, |
|
"loss": 0.4875, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6358225108225108, |
|
"grad_norm": 6.877360820770264, |
|
"learning_rate": 8.504444444444445e-06, |
|
"loss": 0.5162, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6493506493506493, |
|
"grad_norm": 7.325205326080322, |
|
"learning_rate": 8.448888888888889e-06, |
|
"loss": 0.5151, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6628787878787878, |
|
"grad_norm": 6.775233745574951, |
|
"learning_rate": 8.393333333333335e-06, |
|
"loss": 0.498, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6764069264069265, |
|
"grad_norm": 7.457151412963867, |
|
"learning_rate": 8.337777777777777e-06, |
|
"loss": 0.5012, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.689935064935065, |
|
"grad_norm": 7.285881042480469, |
|
"learning_rate": 8.282222222222223e-06, |
|
"loss": 0.4684, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.7034632034632035, |
|
"grad_norm": 9.163443565368652, |
|
"learning_rate": 8.226666666666667e-06, |
|
"loss": 0.5079, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.716991341991342, |
|
"grad_norm": 7.168745994567871, |
|
"learning_rate": 8.171111111111113e-06, |
|
"loss": 0.475, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.7305194805194806, |
|
"grad_norm": 7.457499027252197, |
|
"learning_rate": 8.115555555555557e-06, |
|
"loss": 0.488, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7440476190476191, |
|
"grad_norm": 6.2372846603393555, |
|
"learning_rate": 8.06e-06, |
|
"loss": 0.4822, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7575757575757576, |
|
"grad_norm": 5.880990505218506, |
|
"learning_rate": 8.004444444444445e-06, |
|
"loss": 0.531, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7711038961038961, |
|
"grad_norm": 7.057967185974121, |
|
"learning_rate": 7.948888888888889e-06, |
|
"loss": 0.4872, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7846320346320347, |
|
"grad_norm": 7.299345970153809, |
|
"learning_rate": 7.893333333333335e-06, |
|
"loss": 0.4749, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7981601731601732, |
|
"grad_norm": 6.807291030883789, |
|
"learning_rate": 7.837777777777779e-06, |
|
"loss": 0.4676, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.8116883116883117, |
|
"grad_norm": 5.556617736816406, |
|
"learning_rate": 7.782222222222223e-06, |
|
"loss": 0.4614, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8252164502164502, |
|
"grad_norm": 6.165937900543213, |
|
"learning_rate": 7.726666666666667e-06, |
|
"loss": 0.4459, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.8387445887445888, |
|
"grad_norm": 6.99851655960083, |
|
"learning_rate": 7.67111111111111e-06, |
|
"loss": 0.4734, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 7.385776519775391, |
|
"learning_rate": 7.6155555555555564e-06, |
|
"loss": 0.4547, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8658008658008658, |
|
"grad_norm": 6.626092910766602, |
|
"learning_rate": 7.5600000000000005e-06, |
|
"loss": 0.4462, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8793290043290043, |
|
"grad_norm": 6.563342094421387, |
|
"learning_rate": 7.504444444444445e-06, |
|
"loss": 0.4511, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8928571428571429, |
|
"grad_norm": 9.904861450195312, |
|
"learning_rate": 7.44888888888889e-06, |
|
"loss": 0.4728, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.9063852813852814, |
|
"grad_norm": 7.5107622146606445, |
|
"learning_rate": 7.393333333333333e-06, |
|
"loss": 0.4867, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.9199134199134199, |
|
"grad_norm": 6.618627548217773, |
|
"learning_rate": 7.337777777777778e-06, |
|
"loss": 0.4512, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.9334415584415584, |
|
"grad_norm": 7.19182014465332, |
|
"learning_rate": 7.282222222222222e-06, |
|
"loss": 0.4657, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.946969696969697, |
|
"grad_norm": 6.207240104675293, |
|
"learning_rate": 7.226666666666667e-06, |
|
"loss": 0.4455, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9604978354978355, |
|
"grad_norm": 8.109068870544434, |
|
"learning_rate": 7.171111111111112e-06, |
|
"loss": 0.4744, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.974025974025974, |
|
"grad_norm": 7.550827503204346, |
|
"learning_rate": 7.115555555555557e-06, |
|
"loss": 0.4565, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9875541125541125, |
|
"grad_norm": 5.667859077453613, |
|
"learning_rate": 7.06e-06, |
|
"loss": 0.4368, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.001082251082251, |
|
"grad_norm": 5.609886646270752, |
|
"learning_rate": 7.004444444444445e-06, |
|
"loss": 0.4406, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.0146103896103895, |
|
"grad_norm": 4.862238883972168, |
|
"learning_rate": 6.948888888888889e-06, |
|
"loss": 0.3379, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.0281385281385282, |
|
"grad_norm": 6.2563066482543945, |
|
"learning_rate": 6.893333333333334e-06, |
|
"loss": 0.3386, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 6.764842987060547, |
|
"learning_rate": 6.837777777777779e-06, |
|
"loss": 0.3534, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.0551948051948052, |
|
"grad_norm": 5.962332248687744, |
|
"learning_rate": 6.782222222222222e-06, |
|
"loss": 0.3212, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0687229437229437, |
|
"grad_norm": 5.471970081329346, |
|
"learning_rate": 6.726666666666667e-06, |
|
"loss": 0.3572, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0822510822510822, |
|
"grad_norm": 6.054861545562744, |
|
"learning_rate": 6.671111111111112e-06, |
|
"loss": 0.3339, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0822510822510822, |
|
"eval_loss": 0.42054763436317444, |
|
"eval_runtime": 1782.8878, |
|
"eval_samples_per_second": 2.188, |
|
"eval_steps_per_second": 0.137, |
|
"eval_wer": 0.3197865353037767, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0957792207792207, |
|
"grad_norm": 6.194203853607178, |
|
"learning_rate": 6.615555555555556e-06, |
|
"loss": 0.339, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.1093073593073592, |
|
"grad_norm": 5.470515727996826, |
|
"learning_rate": 6.560000000000001e-06, |
|
"loss": 0.3375, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.1228354978354977, |
|
"grad_norm": 5.1414618492126465, |
|
"learning_rate": 6.504444444444446e-06, |
|
"loss": 0.3348, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 5.000445365905762, |
|
"learning_rate": 6.448888888888889e-06, |
|
"loss": 0.3255, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.149891774891775, |
|
"grad_norm": 5.545360088348389, |
|
"learning_rate": 6.393333333333334e-06, |
|
"loss": 0.3296, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.1634199134199135, |
|
"grad_norm": 5.920198440551758, |
|
"learning_rate": 6.3377777777777786e-06, |
|
"loss": 0.3436, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.176948051948052, |
|
"grad_norm": 5.722521781921387, |
|
"learning_rate": 6.282222222222223e-06, |
|
"loss": 0.3366, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.1904761904761905, |
|
"grad_norm": 6.066483020782471, |
|
"learning_rate": 6.2266666666666675e-06, |
|
"loss": 0.332, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.204004329004329, |
|
"grad_norm": 6.301929473876953, |
|
"learning_rate": 6.171111111111112e-06, |
|
"loss": 0.3207, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.2175324675324675, |
|
"grad_norm": 5.607754230499268, |
|
"learning_rate": 6.1155555555555555e-06, |
|
"loss": 0.3338, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.231060606060606, |
|
"grad_norm": 5.145053863525391, |
|
"learning_rate": 6.0600000000000004e-06, |
|
"loss": 0.3268, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.2445887445887447, |
|
"grad_norm": 5.448360443115234, |
|
"learning_rate": 6.004444444444445e-06, |
|
"loss": 0.3365, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.2581168831168832, |
|
"grad_norm": 5.5156474113464355, |
|
"learning_rate": 5.948888888888889e-06, |
|
"loss": 0.3268, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.2716450216450217, |
|
"grad_norm": 5.252381324768066, |
|
"learning_rate": 5.893333333333334e-06, |
|
"loss": 0.3228, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.2851731601731602, |
|
"grad_norm": 5.7689313888549805, |
|
"learning_rate": 5.837777777777777e-06, |
|
"loss": 0.3304, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.2987012987012987, |
|
"grad_norm": 4.822956085205078, |
|
"learning_rate": 5.782222222222222e-06, |
|
"loss": 0.3066, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.3122294372294372, |
|
"grad_norm": 5.012087345123291, |
|
"learning_rate": 5.726666666666667e-06, |
|
"loss": 0.3323, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.3257575757575757, |
|
"grad_norm": 5.262439250946045, |
|
"learning_rate": 5.671111111111112e-06, |
|
"loss": 0.3256, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.3392857142857144, |
|
"grad_norm": 5.300339221954346, |
|
"learning_rate": 5.615555555555556e-06, |
|
"loss": 0.3287, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.3528138528138527, |
|
"grad_norm": 6.058621883392334, |
|
"learning_rate": 5.560000000000001e-06, |
|
"loss": 0.3283, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.3663419913419914, |
|
"grad_norm": 6.223220348358154, |
|
"learning_rate": 5.504444444444444e-06, |
|
"loss": 0.3288, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.37987012987013, |
|
"grad_norm": 5.865265369415283, |
|
"learning_rate": 5.448888888888889e-06, |
|
"loss": 0.3303, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3933982683982684, |
|
"grad_norm": 4.715255260467529, |
|
"learning_rate": 5.393333333333334e-06, |
|
"loss": 0.3432, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.406926406926407, |
|
"grad_norm": 5.57729434967041, |
|
"learning_rate": 5.337777777777779e-06, |
|
"loss": 0.3174, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.4204545454545454, |
|
"grad_norm": 6.372653484344482, |
|
"learning_rate": 5.282222222222223e-06, |
|
"loss": 0.3251, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.433982683982684, |
|
"grad_norm": 6.7026848793029785, |
|
"learning_rate": 5.226666666666667e-06, |
|
"loss": 0.3258, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.4475108225108224, |
|
"grad_norm": 5.12203311920166, |
|
"learning_rate": 5.171111111111111e-06, |
|
"loss": 0.3217, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.4610389610389611, |
|
"grad_norm": 7.778601169586182, |
|
"learning_rate": 5.115555555555556e-06, |
|
"loss": 0.3182, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.4745670995670996, |
|
"grad_norm": 4.994805335998535, |
|
"learning_rate": 5.060000000000001e-06, |
|
"loss": 0.3142, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.4880952380952381, |
|
"grad_norm": 6.392801761627197, |
|
"learning_rate": 5.004444444444445e-06, |
|
"loss": 0.3289, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.5016233766233766, |
|
"grad_norm": 5.523842811584473, |
|
"learning_rate": 4.94888888888889e-06, |
|
"loss": 0.3248, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.5151515151515151, |
|
"grad_norm": 5.348546981811523, |
|
"learning_rate": 4.893333333333334e-06, |
|
"loss": 0.303, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.5286796536796536, |
|
"grad_norm": 5.714568614959717, |
|
"learning_rate": 4.837777777777778e-06, |
|
"loss": 0.3144, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.5422077922077921, |
|
"grad_norm": 5.544715404510498, |
|
"learning_rate": 4.7822222222222226e-06, |
|
"loss": 0.3183, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.5557359307359309, |
|
"grad_norm": 6.49782133102417, |
|
"learning_rate": 4.7266666666666674e-06, |
|
"loss": 0.2981, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.5692640692640691, |
|
"grad_norm": 5.356492042541504, |
|
"learning_rate": 4.6711111111111115e-06, |
|
"loss": 0.3159, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5827922077922079, |
|
"grad_norm": 5.4491119384765625, |
|
"learning_rate": 4.6155555555555555e-06, |
|
"loss": 0.3333, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5963203463203464, |
|
"grad_norm": 5.832214832305908, |
|
"learning_rate": 4.56e-06, |
|
"loss": 0.3141, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.6098484848484849, |
|
"grad_norm": 5.139626979827881, |
|
"learning_rate": 4.504444444444444e-06, |
|
"loss": 0.308, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.6233766233766234, |
|
"grad_norm": 5.6519999504089355, |
|
"learning_rate": 4.448888888888889e-06, |
|
"loss": 0.3189, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6233766233766234, |
|
"eval_loss": 0.3910607397556305, |
|
"eval_runtime": 1770.6955, |
|
"eval_samples_per_second": 2.203, |
|
"eval_steps_per_second": 0.138, |
|
"eval_wer": 0.29134067420071474, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6369047619047619, |
|
"grad_norm": 5.791579246520996, |
|
"learning_rate": 4.393333333333334e-06, |
|
"loss": 0.3122, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.6504329004329006, |
|
"grad_norm": 5.640200614929199, |
|
"learning_rate": 4.337777777777778e-06, |
|
"loss": 0.3062, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.6639610389610389, |
|
"grad_norm": 5.585713863372803, |
|
"learning_rate": 4.282222222222222e-06, |
|
"loss": 0.3208, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.6774891774891776, |
|
"grad_norm": 5.871087074279785, |
|
"learning_rate": 4.226666666666667e-06, |
|
"loss": 0.3071, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.6910173160173159, |
|
"grad_norm": 5.412327766418457, |
|
"learning_rate": 4.171111111111111e-06, |
|
"loss": 0.3167, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"grad_norm": 5.0698561668396, |
|
"learning_rate": 4.115555555555556e-06, |
|
"loss": 0.3231, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.718073593073593, |
|
"grad_norm": 5.63693904876709, |
|
"learning_rate": 4.060000000000001e-06, |
|
"loss": 0.3128, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.7316017316017316, |
|
"grad_norm": 5.766589164733887, |
|
"learning_rate": 4.004444444444445e-06, |
|
"loss": 0.3229, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.74512987012987, |
|
"grad_norm": 5.414788246154785, |
|
"learning_rate": 3.948888888888889e-06, |
|
"loss": 0.2917, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.7586580086580086, |
|
"grad_norm": 5.106072902679443, |
|
"learning_rate": 3.893333333333333e-06, |
|
"loss": 0.311, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.7721861471861473, |
|
"grad_norm": 4.694611549377441, |
|
"learning_rate": 3.837777777777778e-06, |
|
"loss": 0.3228, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.7857142857142856, |
|
"grad_norm": 6.422979354858398, |
|
"learning_rate": 3.782222222222223e-06, |
|
"loss": 0.314, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7992424242424243, |
|
"grad_norm": 5.5537567138671875, |
|
"learning_rate": 3.726666666666667e-06, |
|
"loss": 0.303, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.8127705627705628, |
|
"grad_norm": 6.503033638000488, |
|
"learning_rate": 3.6711111111111113e-06, |
|
"loss": 0.336, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.8262987012987013, |
|
"grad_norm": 5.406898021697998, |
|
"learning_rate": 3.615555555555556e-06, |
|
"loss": 0.3031, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.8398268398268398, |
|
"grad_norm": 6.486941337585449, |
|
"learning_rate": 3.5600000000000002e-06, |
|
"loss": 0.3203, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.8533549783549783, |
|
"grad_norm": 7.027703285217285, |
|
"learning_rate": 3.5044444444444447e-06, |
|
"loss": 0.3159, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.866883116883117, |
|
"grad_norm": 5.475865364074707, |
|
"learning_rate": 3.4488888888888896e-06, |
|
"loss": 0.3239, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.8804112554112553, |
|
"grad_norm": 6.124994277954102, |
|
"learning_rate": 3.3933333333333336e-06, |
|
"loss": 0.2928, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.893939393939394, |
|
"grad_norm": 4.759301662445068, |
|
"learning_rate": 3.337777777777778e-06, |
|
"loss": 0.2862, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.9074675324675323, |
|
"grad_norm": 5.548280239105225, |
|
"learning_rate": 3.282222222222223e-06, |
|
"loss": 0.312, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.920995670995671, |
|
"grad_norm": 5.691162109375, |
|
"learning_rate": 3.226666666666667e-06, |
|
"loss": 0.3161, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.9345238095238095, |
|
"grad_norm": 6.089394569396973, |
|
"learning_rate": 3.1711111111111114e-06, |
|
"loss": 0.3028, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.948051948051948, |
|
"grad_norm": 5.725650310516357, |
|
"learning_rate": 3.1155555555555555e-06, |
|
"loss": 0.3058, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.9615800865800865, |
|
"grad_norm": 5.124326705932617, |
|
"learning_rate": 3.0600000000000003e-06, |
|
"loss": 0.2947, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.975108225108225, |
|
"grad_norm": 6.62967586517334, |
|
"learning_rate": 3.004444444444445e-06, |
|
"loss": 0.318, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.9886363636363638, |
|
"grad_norm": 6.150094985961914, |
|
"learning_rate": 2.948888888888889e-06, |
|
"loss": 0.3257, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 2.002164502164502, |
|
"grad_norm": 3.962730884552002, |
|
"learning_rate": 2.8933333333333337e-06, |
|
"loss": 0.29, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.0156926406926408, |
|
"grad_norm": 3.999758005142212, |
|
"learning_rate": 2.837777777777778e-06, |
|
"loss": 0.2127, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 2.029220779220779, |
|
"grad_norm": 4.3916015625, |
|
"learning_rate": 2.7822222222222222e-06, |
|
"loss": 0.2073, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 2.0427489177489178, |
|
"grad_norm": 4.647676944732666, |
|
"learning_rate": 2.726666666666667e-06, |
|
"loss": 0.2065, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 2.0562770562770565, |
|
"grad_norm": 5.331233501434326, |
|
"learning_rate": 2.6711111111111116e-06, |
|
"loss": 0.208, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.0698051948051948, |
|
"grad_norm": 4.8974995613098145, |
|
"learning_rate": 2.6155555555555556e-06, |
|
"loss": 0.2007, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 4.972061634063721, |
|
"learning_rate": 2.56e-06, |
|
"loss": 0.2017, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.0968614718614718, |
|
"grad_norm": 5.035933494567871, |
|
"learning_rate": 2.504444444444445e-06, |
|
"loss": 0.2075, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.1103896103896105, |
|
"grad_norm": 6.620712757110596, |
|
"learning_rate": 2.448888888888889e-06, |
|
"loss": 0.2074, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.1239177489177488, |
|
"grad_norm": 7.161535739898682, |
|
"learning_rate": 2.3933333333333334e-06, |
|
"loss": 0.2075, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.1374458874458875, |
|
"grad_norm": 5.531479358673096, |
|
"learning_rate": 2.337777777777778e-06, |
|
"loss": 0.2088, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.150974025974026, |
|
"grad_norm": 4.983880043029785, |
|
"learning_rate": 2.2822222222222223e-06, |
|
"loss": 0.2103, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.1645021645021645, |
|
"grad_norm": 4.373054504394531, |
|
"learning_rate": 2.226666666666667e-06, |
|
"loss": 0.2051, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.1645021645021645, |
|
"eval_loss": 0.38633546233177185, |
|
"eval_runtime": 1768.38, |
|
"eval_samples_per_second": 2.206, |
|
"eval_steps_per_second": 0.138, |
|
"eval_wer": 0.27895296049454266, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.178030303030303, |
|
"grad_norm": 5.22249174118042, |
|
"learning_rate": 2.1711111111111113e-06, |
|
"loss": 0.1988, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.1915584415584415, |
|
"grad_norm": 3.8173792362213135, |
|
"learning_rate": 2.1155555555555557e-06, |
|
"loss": 0.2241, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.20508658008658, |
|
"grad_norm": 5.271940231323242, |
|
"learning_rate": 2.06e-06, |
|
"loss": 0.201, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.2186147186147185, |
|
"grad_norm": 4.359199523925781, |
|
"learning_rate": 2.0044444444444446e-06, |
|
"loss": 0.1946, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.232142857142857, |
|
"grad_norm": 4.675993919372559, |
|
"learning_rate": 1.948888888888889e-06, |
|
"loss": 0.2123, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.2456709956709955, |
|
"grad_norm": 4.090628147125244, |
|
"learning_rate": 1.8933333333333333e-06, |
|
"loss": 0.2045, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.259199134199134, |
|
"grad_norm": 3.5872035026550293, |
|
"learning_rate": 1.837777777777778e-06, |
|
"loss": 0.2042, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 4.375498294830322, |
|
"learning_rate": 1.7822222222222225e-06, |
|
"loss": 0.2056, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.286255411255411, |
|
"grad_norm": 4.972550868988037, |
|
"learning_rate": 1.7266666666666667e-06, |
|
"loss": 0.2024, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.29978354978355, |
|
"grad_norm": 4.7940168380737305, |
|
"learning_rate": 1.6711111111111112e-06, |
|
"loss": 0.1996, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.313311688311688, |
|
"grad_norm": 4.399414539337158, |
|
"learning_rate": 1.6155555555555559e-06, |
|
"loss": 0.2101, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.326839826839827, |
|
"grad_norm": 5.292896747589111, |
|
"learning_rate": 1.56e-06, |
|
"loss": 0.1929, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.340367965367965, |
|
"grad_norm": 4.333370685577393, |
|
"learning_rate": 1.5044444444444446e-06, |
|
"loss": 0.208, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.353896103896104, |
|
"grad_norm": 4.69057035446167, |
|
"learning_rate": 1.4488888888888892e-06, |
|
"loss": 0.1905, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.367424242424242, |
|
"grad_norm": 4.56622838973999, |
|
"learning_rate": 1.3933333333333335e-06, |
|
"loss": 0.2051, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 4.605253219604492, |
|
"learning_rate": 1.337777777777778e-06, |
|
"loss": 0.1978, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.3944805194805197, |
|
"grad_norm": 5.727032661437988, |
|
"learning_rate": 1.2822222222222222e-06, |
|
"loss": 0.2002, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.408008658008658, |
|
"grad_norm": 5.882457256317139, |
|
"learning_rate": 1.2266666666666666e-06, |
|
"loss": 0.202, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.4215367965367967, |
|
"grad_norm": 4.464743614196777, |
|
"learning_rate": 1.171111111111111e-06, |
|
"loss": 0.2145, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.435064935064935, |
|
"grad_norm": 4.503987789154053, |
|
"learning_rate": 1.1155555555555558e-06, |
|
"loss": 0.2101, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.4485930735930737, |
|
"grad_norm": 5.735741138458252, |
|
"learning_rate": 1.06e-06, |
|
"loss": 0.1913, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.462121212121212, |
|
"grad_norm": 4.6319098472595215, |
|
"learning_rate": 1.0044444444444445e-06, |
|
"loss": 0.2001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.4756493506493507, |
|
"grad_norm": 5.589540958404541, |
|
"learning_rate": 9.488888888888889e-07, |
|
"loss": 0.1981, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.4891774891774894, |
|
"grad_norm": 4.481135845184326, |
|
"learning_rate": 8.933333333333334e-07, |
|
"loss": 0.198, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.5027056277056277, |
|
"grad_norm": 6.087165355682373, |
|
"learning_rate": 8.37777777777778e-07, |
|
"loss": 0.2131, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.5162337662337664, |
|
"grad_norm": 4.635289669036865, |
|
"learning_rate": 7.822222222222223e-07, |
|
"loss": 0.2088, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.5297619047619047, |
|
"grad_norm": 4.698585510253906, |
|
"learning_rate": 7.266666666666668e-07, |
|
"loss": 0.2057, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.5432900432900434, |
|
"grad_norm": 4.562716960906982, |
|
"learning_rate": 6.711111111111111e-07, |
|
"loss": 0.2117, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.5568181818181817, |
|
"grad_norm": 5.381985187530518, |
|
"learning_rate": 6.155555555555556e-07, |
|
"loss": 0.1975, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.5703463203463204, |
|
"grad_norm": 5.667773723602295, |
|
"learning_rate": 5.6e-07, |
|
"loss": 0.2409, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.583874458874459, |
|
"grad_norm": 4.565330982208252, |
|
"learning_rate": 5.044444444444445e-07, |
|
"loss": 0.1915, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.5974025974025974, |
|
"grad_norm": 5.17742395401001, |
|
"learning_rate": 4.488888888888889e-07, |
|
"loss": 0.1973, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.6109307359307357, |
|
"grad_norm": 4.878474712371826, |
|
"learning_rate": 3.9333333333333336e-07, |
|
"loss": 0.2209, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.6244588744588744, |
|
"grad_norm": 5.2556328773498535, |
|
"learning_rate": 3.3777777777777777e-07, |
|
"loss": 0.204, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.637987012987013, |
|
"grad_norm": 3.8071792125701904, |
|
"learning_rate": 2.822222222222222e-07, |
|
"loss": 0.2052, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.6515151515151514, |
|
"grad_norm": 4.218277454376221, |
|
"learning_rate": 2.266666666666667e-07, |
|
"loss": 0.1989, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.66504329004329, |
|
"grad_norm": 5.260907173156738, |
|
"learning_rate": 1.7111111111111114e-07, |
|
"loss": 0.1915, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.678571428571429, |
|
"grad_norm": 4.497314453125, |
|
"learning_rate": 1.1555555555555556e-07, |
|
"loss": 0.1997, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.692099567099567, |
|
"grad_norm": 4.543353080749512, |
|
"learning_rate": 6.000000000000001e-08, |
|
"loss": 0.1866, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.7056277056277054, |
|
"grad_norm": 6.265724182128906, |
|
"learning_rate": 4.444444444444445e-09, |
|
"loss": 0.202, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.7056277056277054, |
|
"eval_loss": 0.38099274039268494, |
|
"eval_runtime": 1768.0451, |
|
"eval_samples_per_second": 2.206, |
|
"eval_steps_per_second": 0.138, |
|
"eval_wer": 0.27501690331304934, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.7056277056277054, |
|
"step": 5000, |
|
"total_flos": 5.434978041004032e+20, |
|
"train_loss": 0.39443905401229856, |
|
"train_runtime": 57517.0908, |
|
"train_samples_per_second": 2.782, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.434978041004032e+20, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|