|
{ |
|
"best_metric": 10.058774675781, |
|
"best_model_checkpoint": "./model_out_trpro_more_more/checkpoint-13000", |
|
"epoch": 3.6020224407812718, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.487, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4472, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.4173, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4623, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.457, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4775, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.4445, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4751, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.4633, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4682, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.4807, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3e-05, |
|
"loss": 0.4882, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4681, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4918, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4871, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4986, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4859, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4761, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.4884, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5453, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_cer": 14.794288634766497, |
|
"eval_loss": 0.3284708261489868, |
|
"eval_runtime": 1225.6472, |
|
"eval_samples_per_second": 4.562, |
|
"eval_steps_per_second": 0.286, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.991379310344828e-05, |
|
"loss": 0.5147, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.982758620689655e-05, |
|
"loss": 0.514, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9741379310344836e-05, |
|
"loss": 0.5397, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9655172413793107e-05, |
|
"loss": 0.5269, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.9568965517241384e-05, |
|
"loss": 0.4992, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.9482758620689655e-05, |
|
"loss": 0.538, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.939655172413793e-05, |
|
"loss": 0.5123, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.931034482758621e-05, |
|
"loss": 0.5051, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.922413793103449e-05, |
|
"loss": 0.5342, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.913793103448276e-05, |
|
"loss": 0.5037, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.905172413793104e-05, |
|
"loss": 0.5492, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.896551724137931e-05, |
|
"loss": 0.5256, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.8879310344827586e-05, |
|
"loss": 0.531, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.8793103448275864e-05, |
|
"loss": 0.5245, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.870689655172414e-05, |
|
"loss": 0.4702, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.862068965517241e-05, |
|
"loss": 0.5404, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.853793103448276e-05, |
|
"loss": 0.4968, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.8451724137931036e-05, |
|
"loss": 0.4905, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.836551724137931e-05, |
|
"loss": 0.5281, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.827931034482759e-05, |
|
"loss": 0.5144, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_cer": 14.356672842266658, |
|
"eval_loss": 0.3405072093009949, |
|
"eval_runtime": 1158.9803, |
|
"eval_samples_per_second": 4.824, |
|
"eval_steps_per_second": 0.302, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.819310344827587e-05, |
|
"loss": 0.5357, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.810689655172414e-05, |
|
"loss": 0.5521, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.802068965517242e-05, |
|
"loss": 0.5413, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.793448275862069e-05, |
|
"loss": 0.4707, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.784827586206897e-05, |
|
"loss": 0.4771, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.7762068965517245e-05, |
|
"loss": 0.5055, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.767586206896552e-05, |
|
"loss": 0.5464, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.758965517241379e-05, |
|
"loss": 0.4839, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.750344827586207e-05, |
|
"loss": 0.5087, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.741724137931035e-05, |
|
"loss": 0.4728, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.7331034482758626e-05, |
|
"loss": 0.4457, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.72448275862069e-05, |
|
"loss": 0.5246, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.7158620689655175e-05, |
|
"loss": 0.4953, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.7072413793103446e-05, |
|
"loss": 0.5318, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.6986206896551724e-05, |
|
"loss": 0.5062, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.69e-05, |
|
"loss": 0.4979, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.681379310344828e-05, |
|
"loss": 0.5202, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.672758620689656e-05, |
|
"loss": 0.5267, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.664137931034483e-05, |
|
"loss": 0.4971, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.6555172413793106e-05, |
|
"loss": 0.49, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_cer": 14.058008049575161, |
|
"eval_loss": 0.3323359787464142, |
|
"eval_runtime": 1178.6215, |
|
"eval_samples_per_second": 4.744, |
|
"eval_steps_per_second": 0.297, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.6468965517241384e-05, |
|
"loss": 0.4821, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.638275862068966e-05, |
|
"loss": 0.4603, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.629655172413793e-05, |
|
"loss": 0.5213, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.621034482758621e-05, |
|
"loss": 0.4922, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.612413793103448e-05, |
|
"loss": 0.5202, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.603793103448276e-05, |
|
"loss": 0.5046, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5951724137931036e-05, |
|
"loss": 0.4795, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.5865517241379314e-05, |
|
"loss": 0.447, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.5779310344827585e-05, |
|
"loss": 0.4752, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.569310344827586e-05, |
|
"loss": 0.515, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.560689655172414e-05, |
|
"loss": 0.4882, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.552068965517242e-05, |
|
"loss": 0.4721, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.5434482758620696e-05, |
|
"loss": 0.4542, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.534827586206897e-05, |
|
"loss": 0.5197, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5262068965517245e-05, |
|
"loss": 0.4589, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.5175862068965516e-05, |
|
"loss": 0.4668, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.50896551724138e-05, |
|
"loss": 0.4824, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.500344827586207e-05, |
|
"loss": 0.4967, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.491724137931035e-05, |
|
"loss": 0.4913, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.483103448275862e-05, |
|
"loss": 0.461, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_cer": 13.056602568197789, |
|
"eval_loss": 0.31062883138656616, |
|
"eval_runtime": 1163.7705, |
|
"eval_samples_per_second": 4.804, |
|
"eval_steps_per_second": 0.301, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.47448275862069e-05, |
|
"loss": 0.4987, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.4658620689655175e-05, |
|
"loss": 0.5091, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.457241379310345e-05, |
|
"loss": 0.5096, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.4486206896551724e-05, |
|
"loss": 0.4716, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.44e-05, |
|
"loss": 0.4618, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.431379310344827e-05, |
|
"loss": 0.4588, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.422758620689656e-05, |
|
"loss": 0.5037, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.414137931034483e-05, |
|
"loss": 0.4818, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4055172413793106e-05, |
|
"loss": 0.4802, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.3968965517241384e-05, |
|
"loss": 0.4889, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.3882758620689655e-05, |
|
"loss": 0.4914, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.379655172413793e-05, |
|
"loss": 0.4867, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.371034482758621e-05, |
|
"loss": 0.5006, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.362413793103449e-05, |
|
"loss": 0.4705, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.353793103448276e-05, |
|
"loss": 0.482, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.3451724137931037e-05, |
|
"loss": 0.4818, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.336551724137931e-05, |
|
"loss": 0.4478, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.327931034482759e-05, |
|
"loss": 0.4759, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.319310344827586e-05, |
|
"loss": 0.4518, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.310689655172414e-05, |
|
"loss": 0.4935, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_cer": 13.829617325752253, |
|
"eval_loss": 0.3001407980918884, |
|
"eval_runtime": 1213.7136, |
|
"eval_samples_per_second": 4.607, |
|
"eval_steps_per_second": 0.288, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.302068965517241e-05, |
|
"loss": 0.4943, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.293448275862069e-05, |
|
"loss": 0.4451, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.284827586206897e-05, |
|
"loss": 0.456, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2762068965517245e-05, |
|
"loss": 0.4944, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.267586206896552e-05, |
|
"loss": 0.4523, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2589655172413794e-05, |
|
"loss": 0.4865, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.250344827586207e-05, |
|
"loss": 0.4429, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.241724137931035e-05, |
|
"loss": 0.4705, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.233103448275863e-05, |
|
"loss": 0.437, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.22448275862069e-05, |
|
"loss": 0.4218, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.2158620689655176e-05, |
|
"loss": 0.5038, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.2072413793103447e-05, |
|
"loss": 0.4579, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.1986206896551724e-05, |
|
"loss": 0.4738, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.19e-05, |
|
"loss": 0.4642, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.181379310344828e-05, |
|
"loss": 0.4422, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.172758620689655e-05, |
|
"loss": 0.4212, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.164137931034483e-05, |
|
"loss": 0.4803, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.1555172413793106e-05, |
|
"loss": 0.4333, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.1468965517241384e-05, |
|
"loss": 0.4663, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1382758620689655e-05, |
|
"loss": 0.4836, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_cer": 13.110905257778061, |
|
"eval_loss": 0.29782944917678833, |
|
"eval_runtime": 1227.0769, |
|
"eval_samples_per_second": 4.556, |
|
"eval_steps_per_second": 0.285, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.129655172413793e-05, |
|
"loss": 0.482, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.121034482758621e-05, |
|
"loss": 0.4706, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.112413793103448e-05, |
|
"loss": 0.4658, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.1037931034482766e-05, |
|
"loss": 0.4624, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.095172413793104e-05, |
|
"loss": 0.4754, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.0865517241379315e-05, |
|
"loss": 0.4866, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.0779310344827586e-05, |
|
"loss": 0.4723, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.069310344827586e-05, |
|
"loss": 0.4707, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.060689655172414e-05, |
|
"loss": 0.3989, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.052068965517242e-05, |
|
"loss": 0.4714, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.043448275862069e-05, |
|
"loss": 0.4451, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.034827586206897e-05, |
|
"loss": 0.4785, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.026206896551724e-05, |
|
"loss": 0.4509, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.017586206896552e-05, |
|
"loss": 0.4523, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.0089655172413794e-05, |
|
"loss": 0.4264, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.000344827586207e-05, |
|
"loss": 0.4685, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.991724137931035e-05, |
|
"loss": 0.4632, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.983103448275862e-05, |
|
"loss": 0.4548, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.97448275862069e-05, |
|
"loss": 0.4306, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.9658620689655176e-05, |
|
"loss": 0.4782, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_cer": 12.711620775570179, |
|
"eval_loss": 0.2923573851585388, |
|
"eval_runtime": 1186.1965, |
|
"eval_samples_per_second": 4.713, |
|
"eval_steps_per_second": 0.295, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.9572413793103454e-05, |
|
"loss": 0.4219, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.9486206896551725e-05, |
|
"loss": 0.471, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.94e-05, |
|
"loss": 0.4687, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.931379310344827e-05, |
|
"loss": 0.4356, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.922758620689656e-05, |
|
"loss": 0.4144, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.914137931034483e-05, |
|
"loss": 0.3605, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.9055172413793106e-05, |
|
"loss": 0.3652, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.896896551724138e-05, |
|
"loss": 0.3725, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.8882758620689655e-05, |
|
"loss": 0.3481, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.879655172413793e-05, |
|
"loss": 0.3662, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.871034482758621e-05, |
|
"loss": 0.3835, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.862413793103448e-05, |
|
"loss": 0.3638, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.853793103448276e-05, |
|
"loss": 0.3615, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.845172413793104e-05, |
|
"loss": 0.3327, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.8365517241379315e-05, |
|
"loss": 0.3679, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.827931034482759e-05, |
|
"loss": 0.3549, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.8193103448275863e-05, |
|
"loss": 0.3728, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.810689655172414e-05, |
|
"loss": 0.3493, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.802068965517241e-05, |
|
"loss": 0.3509, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.793448275862069e-05, |
|
"loss": 0.3441, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_cer": 11.794863604420877, |
|
"eval_loss": 0.2874628007411957, |
|
"eval_runtime": 1164.5329, |
|
"eval_samples_per_second": 4.801, |
|
"eval_steps_per_second": 0.301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.784827586206897e-05, |
|
"loss": 0.3513, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.7762068965517245e-05, |
|
"loss": 0.3442, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.7675862068965516e-05, |
|
"loss": 0.3183, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.7589655172413794e-05, |
|
"loss": 0.3674, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.750344827586207e-05, |
|
"loss": 0.3491, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.741724137931035e-05, |
|
"loss": 0.337, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.733103448275862e-05, |
|
"loss": 0.3711, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.72448275862069e-05, |
|
"loss": 0.3839, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.7158620689655176e-05, |
|
"loss": 0.3724, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.707241379310345e-05, |
|
"loss": 0.3444, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.698620689655173e-05, |
|
"loss": 0.3997, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.69e-05, |
|
"loss": 0.3742, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.681379310344828e-05, |
|
"loss": 0.3657, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.672758620689655e-05, |
|
"loss": 0.3762, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.664137931034483e-05, |
|
"loss": 0.3815, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.655517241379311e-05, |
|
"loss": 0.3628, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.6468965517241384e-05, |
|
"loss": 0.3533, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.6382758620689655e-05, |
|
"loss": 0.3857, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.629655172413793e-05, |
|
"loss": 0.3381, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.6210344827586204e-05, |
|
"loss": 0.3647, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_cer": 11.617581294320578, |
|
"eval_loss": 0.28388434648513794, |
|
"eval_runtime": 1169.2298, |
|
"eval_samples_per_second": 4.782, |
|
"eval_steps_per_second": 0.299, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.612413793103449e-05, |
|
"loss": 0.3935, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.603793103448276e-05, |
|
"loss": 0.3514, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.595172413793104e-05, |
|
"loss": 0.3925, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.586551724137931e-05, |
|
"loss": 0.355, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5779310344827586e-05, |
|
"loss": 0.3539, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.5693103448275864e-05, |
|
"loss": 0.3583, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.560689655172414e-05, |
|
"loss": 0.3312, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.552068965517242e-05, |
|
"loss": 0.331, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.543448275862069e-05, |
|
"loss": 0.3703, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.534827586206897e-05, |
|
"loss": 0.3599, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.526206896551724e-05, |
|
"loss": 0.3779, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.517586206896552e-05, |
|
"loss": 0.3338, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.5089655172413794e-05, |
|
"loss": 0.3249, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.500344827586207e-05, |
|
"loss": 0.364, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.491724137931034e-05, |
|
"loss": 0.3755, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.483103448275862e-05, |
|
"loss": 0.353, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.47448275862069e-05, |
|
"loss": 0.3483, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.4658620689655176e-05, |
|
"loss": 0.3675, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.457241379310345e-05, |
|
"loss": 0.3779, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.4486206896551725e-05, |
|
"loss": 0.3642, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_cer": 11.572861432313294, |
|
"eval_loss": 0.28437113761901855, |
|
"eval_runtime": 1173.2699, |
|
"eval_samples_per_second": 4.765, |
|
"eval_steps_per_second": 0.298, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.4399999999999996e-05, |
|
"loss": 0.3872, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.431379310344828e-05, |
|
"loss": 0.392, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.422758620689656e-05, |
|
"loss": 0.3545, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.41448275862069e-05, |
|
"loss": 0.3627, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.4058620689655175e-05, |
|
"loss": 0.3362, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.397241379310345e-05, |
|
"loss": 0.3628, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.3886206896551724e-05, |
|
"loss": 0.3157, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.38e-05, |
|
"loss": 0.3649, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.371379310344828e-05, |
|
"loss": 0.3462, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.362758620689656e-05, |
|
"loss": 0.3833, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.354137931034483e-05, |
|
"loss": 0.3643, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.3455172413793106e-05, |
|
"loss": 0.3788, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.336896551724138e-05, |
|
"loss": 0.3625, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.328275862068966e-05, |
|
"loss": 0.363, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.319655172413793e-05, |
|
"loss": 0.3714, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.311034482758621e-05, |
|
"loss": 0.3636, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.302413793103448e-05, |
|
"loss": 0.3349, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.293793103448276e-05, |
|
"loss": 0.3516, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.2851724137931036e-05, |
|
"loss": 0.3327, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.2765517241379314e-05, |
|
"loss": 0.3493, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_cer": 11.528141570306012, |
|
"eval_loss": 0.2879265248775482, |
|
"eval_runtime": 1178.4372, |
|
"eval_samples_per_second": 4.744, |
|
"eval_steps_per_second": 0.297, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.2679310344827585e-05, |
|
"loss": 0.3419, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.259310344827586e-05, |
|
"loss": 0.3876, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.250689655172414e-05, |
|
"loss": 0.3392, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.242068965517241e-05, |
|
"loss": 0.3781, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.2334482758620696e-05, |
|
"loss": 0.3616, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.224827586206897e-05, |
|
"loss": 0.3596, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.2162068965517245e-05, |
|
"loss": 0.3681, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.2075862068965516e-05, |
|
"loss": 0.3373, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.1989655172413794e-05, |
|
"loss": 0.3311, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.190344827586207e-05, |
|
"loss": 0.4004, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.181724137931035e-05, |
|
"loss": 0.3731, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.173103448275862e-05, |
|
"loss": 0.3443, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.16448275862069e-05, |
|
"loss": 0.345, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 3.155862068965517e-05, |
|
"loss": 0.312, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.147241379310345e-05, |
|
"loss": 0.3474, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.1386206896551724e-05, |
|
"loss": 0.3162, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.13e-05, |
|
"loss": 0.3227, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.121379310344828e-05, |
|
"loss": 0.3482, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.112758620689655e-05, |
|
"loss": 0.3398, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.104137931034483e-05, |
|
"loss": 0.3466, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_cer": 11.44349326007794, |
|
"eval_loss": 0.28201985359191895, |
|
"eval_runtime": 1169.509, |
|
"eval_samples_per_second": 4.781, |
|
"eval_steps_per_second": 0.299, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.0955172413793106e-05, |
|
"loss": 0.3256, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.0868965517241384e-05, |
|
"loss": 0.3208, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.0782758620689655e-05, |
|
"loss": 0.3267, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.069655172413793e-05, |
|
"loss": 0.3675, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.061034482758621e-05, |
|
"loss": 0.3502, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.052413793103449e-05, |
|
"loss": 0.3586, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.043793103448276e-05, |
|
"loss": 0.3211, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.0351724137931037e-05, |
|
"loss": 0.3631, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.026551724137931e-05, |
|
"loss": 0.3764, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.017931034482759e-05, |
|
"loss": 0.3636, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.009310344827586e-05, |
|
"loss": 0.3682, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.000689655172414e-05, |
|
"loss": 0.3484, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.9920689655172412e-05, |
|
"loss": 0.336, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.983448275862069e-05, |
|
"loss": 0.3457, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.974827586206897e-05, |
|
"loss": 0.3548, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.966206896551724e-05, |
|
"loss": 0.3455, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.957586206896552e-05, |
|
"loss": 0.3303, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.9489655172413794e-05, |
|
"loss": 0.3344, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.940344827586207e-05, |
|
"loss": 0.3517, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.9317241379310346e-05, |
|
"loss": 0.3507, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_cer": 11.012266019293426, |
|
"eval_loss": 0.27717769145965576, |
|
"eval_runtime": 1163.8916, |
|
"eval_samples_per_second": 4.804, |
|
"eval_steps_per_second": 0.301, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.9231034482758624e-05, |
|
"loss": 0.3283, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.9144827586206898e-05, |
|
"loss": 0.3808, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.9058620689655176e-05, |
|
"loss": 0.3473, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.8972413793103447e-05, |
|
"loss": 0.3502, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.8886206896551728e-05, |
|
"loss": 0.3507, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.88e-05, |
|
"loss": 0.3086, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.8713793103448276e-05, |
|
"loss": 0.3266, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.862758620689655e-05, |
|
"loss": 0.3236, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.854137931034483e-05, |
|
"loss": 0.3063, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.8455172413793106e-05, |
|
"loss": 0.3357, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.836896551724138e-05, |
|
"loss": 0.3366, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.828275862068966e-05, |
|
"loss": 0.3459, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.8196551724137933e-05, |
|
"loss": 0.3396, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.811034482758621e-05, |
|
"loss": 0.3388, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.8024137931034485e-05, |
|
"loss": 0.3657, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.7937931034482763e-05, |
|
"loss": 0.3604, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.7851724137931033e-05, |
|
"loss": 0.3467, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.7765517241379315e-05, |
|
"loss": 0.3434, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.7679310344827586e-05, |
|
"loss": 0.3416, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.7593103448275863e-05, |
|
"loss": 0.3482, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_cer": 11.178368363891906, |
|
"eval_loss": 0.2766138017177582, |
|
"eval_runtime": 1182.8471, |
|
"eval_samples_per_second": 4.727, |
|
"eval_steps_per_second": 0.296, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.7506896551724138e-05, |
|
"loss": 0.3213, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.7420689655172415e-05, |
|
"loss": 0.359, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.733448275862069e-05, |
|
"loss": 0.3506, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.7248275862068968e-05, |
|
"loss": 0.3178, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.716206896551724e-05, |
|
"loss": 0.3371, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 2.707586206896552e-05, |
|
"loss": 0.3236, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.6989655172413797e-05, |
|
"loss": 0.3231, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.6903448275862068e-05, |
|
"loss": 0.3007, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.6820689655172414e-05, |
|
"loss": 0.324, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.6734482758620692e-05, |
|
"loss": 0.2574, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.6648275862068966e-05, |
|
"loss": 0.2346, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.6562068965517244e-05, |
|
"loss": 0.2627, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.647586206896552e-05, |
|
"loss": 0.2614, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.6389655172413796e-05, |
|
"loss": 0.2279, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.630344827586207e-05, |
|
"loss": 0.271, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.621724137931035e-05, |
|
"loss": 0.2566, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.613103448275862e-05, |
|
"loss": 0.2471, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.60448275862069e-05, |
|
"loss": 0.2438, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.595862068965517e-05, |
|
"loss": 0.2457, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.587241379310345e-05, |
|
"loss": 0.232, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_cer": 10.926020571136522, |
|
"eval_loss": 0.2784821093082428, |
|
"eval_runtime": 1168.6896, |
|
"eval_samples_per_second": 4.784, |
|
"eval_steps_per_second": 0.299, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.5786206896551724e-05, |
|
"loss": 0.256, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.57e-05, |
|
"loss": 0.2383, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.5613793103448276e-05, |
|
"loss": 0.2516, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.5527586206896553e-05, |
|
"loss": 0.229, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.5441379310344828e-05, |
|
"loss": 0.242, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.5355172413793105e-05, |
|
"loss": 0.243, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5268965517241383e-05, |
|
"loss": 0.2476, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5182758620689658e-05, |
|
"loss": 0.2351, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.5096551724137935e-05, |
|
"loss": 0.2526, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.5010344827586206e-05, |
|
"loss": 0.2424, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.4924137931034484e-05, |
|
"loss": 0.264, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.483793103448276e-05, |
|
"loss": 0.244, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.4751724137931036e-05, |
|
"loss": 0.2588, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.4665517241379314e-05, |
|
"loss": 0.2418, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.4579310344827588e-05, |
|
"loss": 0.2556, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.4493103448275866e-05, |
|
"loss": 0.2253, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.440689655172414e-05, |
|
"loss": 0.2629, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.4320689655172415e-05, |
|
"loss": 0.2512, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.4234482758620692e-05, |
|
"loss": 0.2186, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.4148275862068967e-05, |
|
"loss": 0.2539, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_cer": 10.593815881939564, |
|
"eval_loss": 0.2776859700679779, |
|
"eval_runtime": 1165.9559, |
|
"eval_samples_per_second": 4.795, |
|
"eval_steps_per_second": 0.3, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.406206896551724e-05, |
|
"loss": 0.2433, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.397586206896552e-05, |
|
"loss": 0.2383, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.3889655172413793e-05, |
|
"loss": 0.2454, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.380344827586207e-05, |
|
"loss": 0.2596, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.3717241379310345e-05, |
|
"loss": 0.2479, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.363103448275862e-05, |
|
"loss": 0.2519, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.3544827586206897e-05, |
|
"loss": 0.2394, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.345862068965517e-05, |
|
"loss": 0.2336, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.337241379310345e-05, |
|
"loss": 0.2549, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3286206896551727e-05, |
|
"loss": 0.2784, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.32e-05, |
|
"loss": 0.2365, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.311379310344828e-05, |
|
"loss": 0.2445, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.3027586206896554e-05, |
|
"loss": 0.2411, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.2941379310344828e-05, |
|
"loss": 0.257, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.2855172413793106e-05, |
|
"loss": 0.2196, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.276896551724138e-05, |
|
"loss": 0.2583, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.2682758620689658e-05, |
|
"loss": 0.2522, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2596551724137932e-05, |
|
"loss": 0.2712, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.2510344827586206e-05, |
|
"loss": 0.2624, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.2424137931034484e-05, |
|
"loss": 0.2377, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_cer": 10.659298537021657, |
|
"eval_loss": 0.2783927917480469, |
|
"eval_runtime": 1180.6736, |
|
"eval_samples_per_second": 4.735, |
|
"eval_steps_per_second": 0.296, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.233793103448276e-05, |
|
"loss": 0.2526, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.2251724137931036e-05, |
|
"loss": 0.2408, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.216551724137931e-05, |
|
"loss": 0.2778, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.2079310344827585e-05, |
|
"loss": 0.253, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.1993103448275863e-05, |
|
"loss": 0.2416, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.190689655172414e-05, |
|
"loss": 0.2477, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.1820689655172415e-05, |
|
"loss": 0.2453, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.1734482758620693e-05, |
|
"loss": 0.2463, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.1648275862068967e-05, |
|
"loss": 0.2409, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.1562068965517245e-05, |
|
"loss": 0.2305, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 2.147586206896552e-05, |
|
"loss": 0.2342, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.1389655172413793e-05, |
|
"loss": 0.2551, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.130344827586207e-05, |
|
"loss": 0.2396, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.1217241379310345e-05, |
|
"loss": 0.268, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.1131034482758623e-05, |
|
"loss": 0.2384, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.1044827586206898e-05, |
|
"loss": 0.2418, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.0958620689655172e-05, |
|
"loss": 0.2414, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.087241379310345e-05, |
|
"loss": 0.2232, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.0786206896551724e-05, |
|
"loss": 0.248, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.07e-05, |
|
"loss": 0.2384, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_cer": 10.659298537021657, |
|
"eval_loss": 0.2808603048324585, |
|
"eval_runtime": 1165.5072, |
|
"eval_samples_per_second": 4.797, |
|
"eval_steps_per_second": 0.3, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.0613793103448276e-05, |
|
"loss": 0.2448, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.0527586206896554e-05, |
|
"loss": 0.2268, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.0441379310344828e-05, |
|
"loss": 0.247, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.0355172413793106e-05, |
|
"loss": 0.2345, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.026896551724138e-05, |
|
"loss": 0.2117, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.0182758620689658e-05, |
|
"loss": 0.2265, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.0096551724137932e-05, |
|
"loss": 0.2464, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.0010344827586207e-05, |
|
"loss": 0.2741, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.9924137931034484e-05, |
|
"loss": 0.2397, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.983793103448276e-05, |
|
"loss": 0.2357, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.9751724137931037e-05, |
|
"loss": 0.2435, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.966551724137931e-05, |
|
"loss": 0.2571, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.9579310344827585e-05, |
|
"loss": 0.2675, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.9493103448275863e-05, |
|
"loss": 0.2476, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.9406896551724137e-05, |
|
"loss": 0.2412, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.9320689655172415e-05, |
|
"loss": 0.2502, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.923448275862069e-05, |
|
"loss": 0.2327, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9148275862068964e-05, |
|
"loss": 0.2198, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.9062068965517245e-05, |
|
"loss": 0.2614, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.897586206896552e-05, |
|
"loss": 0.2495, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_cer": 10.632147192231521, |
|
"eval_loss": 0.281376451253891, |
|
"eval_runtime": 1164.8408, |
|
"eval_samples_per_second": 4.8, |
|
"eval_steps_per_second": 0.3, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.8889655172413794e-05, |
|
"loss": 0.2442, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.880344827586207e-05, |
|
"loss": 0.2369, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.8717241379310346e-05, |
|
"loss": 0.2186, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.8631034482758623e-05, |
|
"loss": 0.234, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.8544827586206898e-05, |
|
"loss": 0.2448, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.8458620689655172e-05, |
|
"loss": 0.2449, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.837241379310345e-05, |
|
"loss": 0.2322, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.8286206896551724e-05, |
|
"loss": 0.2489, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.8200000000000002e-05, |
|
"loss": 0.2569, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.8113793103448276e-05, |
|
"loss": 0.2331, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.802758620689655e-05, |
|
"loss": 0.2647, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.794137931034483e-05, |
|
"loss": 0.2426, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.7855172413793103e-05, |
|
"loss": 0.2341, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.776896551724138e-05, |
|
"loss": 0.24, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7682758620689658e-05, |
|
"loss": 0.249, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.7596551724137933e-05, |
|
"loss": 0.2217, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.751034482758621e-05, |
|
"loss": 0.2467, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.7424137931034485e-05, |
|
"loss": 0.2529, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.733793103448276e-05, |
|
"loss": 0.2578, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.7251724137931037e-05, |
|
"loss": 0.2312, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_cer": 10.60020443365489, |
|
"eval_loss": 0.28221753239631653, |
|
"eval_runtime": 1168.5035, |
|
"eval_samples_per_second": 4.785, |
|
"eval_steps_per_second": 0.3, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.716551724137931e-05, |
|
"loss": 0.2284, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.7079310344827585e-05, |
|
"loss": 0.2348, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.6993103448275863e-05, |
|
"loss": 0.2244, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.6906896551724138e-05, |
|
"loss": 0.2522, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.6820689655172415e-05, |
|
"loss": 0.2476, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.673448275862069e-05, |
|
"loss": 0.2279, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.6648275862068964e-05, |
|
"loss": 0.2317, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6562068965517242e-05, |
|
"loss": 0.2305, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.6475862068965516e-05, |
|
"loss": 0.2364, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.6389655172413794e-05, |
|
"loss": 0.2572, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.630344827586207e-05, |
|
"loss": 0.2065, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.6217241379310346e-05, |
|
"loss": 0.2398, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.6131034482758624e-05, |
|
"loss": 0.2526, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.6044827586206898e-05, |
|
"loss": 0.2263, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.5958620689655172e-05, |
|
"loss": 0.2527, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.587241379310345e-05, |
|
"loss": 0.2401, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.5786206896551724e-05, |
|
"loss": 0.2221, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5700000000000002e-05, |
|
"loss": 0.2251, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5613793103448276e-05, |
|
"loss": 0.247, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.552758620689655e-05, |
|
"loss": 0.2264, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_cer": 10.73436401967674, |
|
"eval_loss": 0.2812344431877136, |
|
"eval_runtime": 1186.9835, |
|
"eval_samples_per_second": 4.71, |
|
"eval_steps_per_second": 0.295, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.544137931034483e-05, |
|
"loss": 0.2459, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.5355172413793103e-05, |
|
"loss": 0.2541, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.526896551724138e-05, |
|
"loss": 0.2265, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.5182758620689655e-05, |
|
"loss": 0.2329, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.5096551724137931e-05, |
|
"loss": 0.2245, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.5010344827586207e-05, |
|
"loss": 0.265, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.4924137931034485e-05, |
|
"loss": 0.2423, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.4837931034482761e-05, |
|
"loss": 0.2319, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4751724137931037e-05, |
|
"loss": 0.2286, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.4665517241379311e-05, |
|
"loss": 0.2352, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.4579310344827587e-05, |
|
"loss": 0.2635, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.4493103448275863e-05, |
|
"loss": 0.2269, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.440689655172414e-05, |
|
"loss": 0.2296, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4324137931034484e-05, |
|
"loss": 0.1896, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.423793103448276e-05, |
|
"loss": 0.1824, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.4151724137931036e-05, |
|
"loss": 0.1621, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.4065517241379312e-05, |
|
"loss": 0.1497, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.3979310344827586e-05, |
|
"loss": 0.1597, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.3893103448275862e-05, |
|
"loss": 0.1642, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.3806896551724138e-05, |
|
"loss": 0.1468, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_cer": 10.796652398901168, |
|
"eval_loss": 0.2832615077495575, |
|
"eval_runtime": 1183.0471, |
|
"eval_samples_per_second": 4.726, |
|
"eval_steps_per_second": 0.296, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.3720689655172414e-05, |
|
"loss": 0.1427, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.363448275862069e-05, |
|
"loss": 0.17, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.3548275862068965e-05, |
|
"loss": 0.1585, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.3462068965517241e-05, |
|
"loss": 0.1727, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.3375862068965517e-05, |
|
"loss": 0.1654, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.3289655172413793e-05, |
|
"loss": 0.1517, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.320344827586207e-05, |
|
"loss": 0.1536, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.3117241379310347e-05, |
|
"loss": 0.1692, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.3031034482758623e-05, |
|
"loss": 0.1673, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.2944827586206897e-05, |
|
"loss": 0.1554, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.2858620689655173e-05, |
|
"loss": 0.1616, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.277241379310345e-05, |
|
"loss": 0.1553, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.2686206896551725e-05, |
|
"loss": 0.1692, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.2600000000000001e-05, |
|
"loss": 0.1582, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.2513793103448276e-05, |
|
"loss": 0.1634, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.2427586206896552e-05, |
|
"loss": 0.1584, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.2341379310344828e-05, |
|
"loss": 0.1678, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.2255172413793104e-05, |
|
"loss": 0.1767, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.216896551724138e-05, |
|
"loss": 0.1561, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.2082758620689656e-05, |
|
"loss": 0.1557, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_cer": 10.533124640643967, |
|
"eval_loss": 0.28545621037483215, |
|
"eval_runtime": 1176.7108, |
|
"eval_samples_per_second": 4.751, |
|
"eval_steps_per_second": 0.297, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.1996551724137932e-05, |
|
"loss": 0.172, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.1910344827586208e-05, |
|
"loss": 0.1571, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.1824137931034484e-05, |
|
"loss": 0.1605, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.1737931034482758e-05, |
|
"loss": 0.1612, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.1651724137931034e-05, |
|
"loss": 0.1615, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.156551724137931e-05, |
|
"loss": 0.1627, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.1479310344827588e-05, |
|
"loss": 0.1481, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.1393103448275863e-05, |
|
"loss": 0.1532, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.1306896551724139e-05, |
|
"loss": 0.1358, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.1220689655172415e-05, |
|
"loss": 0.1619, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.113448275862069e-05, |
|
"loss": 0.1721, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.1048275862068965e-05, |
|
"loss": 0.1662, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.0962068965517241e-05, |
|
"loss": 0.1511, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.0875862068965517e-05, |
|
"loss": 0.1554, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.0789655172413795e-05, |
|
"loss": 0.1767, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.070344827586207e-05, |
|
"loss": 0.1613, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.0617241379310345e-05, |
|
"loss": 0.1752, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.0531034482758621e-05, |
|
"loss": 0.1493, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.0444827586206897e-05, |
|
"loss": 0.1927, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.0358620689655173e-05, |
|
"loss": 0.1639, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_cer": 10.167380054941546, |
|
"eval_loss": 0.28585749864578247, |
|
"eval_runtime": 1168.3755, |
|
"eval_samples_per_second": 4.785, |
|
"eval_steps_per_second": 0.3, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.0272413793103448e-05, |
|
"loss": 0.1708, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.0186206896551724e-05, |
|
"loss": 0.1692, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 0.1754, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.0013793103448278e-05, |
|
"loss": 0.1695, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 9.927586206896552e-06, |
|
"loss": 0.1695, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 9.841379310344828e-06, |
|
"loss": 0.1718, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 9.755172413793104e-06, |
|
"loss": 0.151, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 9.66896551724138e-06, |
|
"loss": 0.1672, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.582758620689654e-06, |
|
"loss": 0.1665, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 9.49655172413793e-06, |
|
"loss": 0.1567, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.410344827586208e-06, |
|
"loss": 0.1691, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 9.324137931034484e-06, |
|
"loss": 0.1648, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 9.237931034482759e-06, |
|
"loss": 0.1576, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 9.151724137931035e-06, |
|
"loss": 0.1689, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 9.06551724137931e-06, |
|
"loss": 0.159, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 8.979310344827587e-06, |
|
"loss": 0.1682, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 8.893103448275863e-06, |
|
"loss": 0.1729, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 8.806896551724137e-06, |
|
"loss": 0.168, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 8.720689655172415e-06, |
|
"loss": 0.1704, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 8.634482758620691e-06, |
|
"loss": 0.1603, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_cer": 10.212099916948828, |
|
"eval_loss": 0.2885717749595642, |
|
"eval_runtime": 1168.0562, |
|
"eval_samples_per_second": 4.787, |
|
"eval_steps_per_second": 0.3, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 8.548275862068967e-06, |
|
"loss": 0.1564, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.462068965517241e-06, |
|
"loss": 0.1511, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 8.375862068965517e-06, |
|
"loss": 0.1575, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 8.289655172413793e-06, |
|
"loss": 0.1574, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.20344827586207e-06, |
|
"loss": 0.1673, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 8.117241379310346e-06, |
|
"loss": 0.1605, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 8.031034482758622e-06, |
|
"loss": 0.1562, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 7.944827586206898e-06, |
|
"loss": 0.1629, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.858620689655174e-06, |
|
"loss": 0.1629, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 7.772413793103448e-06, |
|
"loss": 0.1849, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 7.686206896551724e-06, |
|
"loss": 0.1501, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.6e-06, |
|
"loss": 0.1563, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 7.513793103448276e-06, |
|
"loss": 0.1596, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 7.427586206896551e-06, |
|
"loss": 0.156, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.341379310344828e-06, |
|
"loss": 0.1655, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 7.255172413793104e-06, |
|
"loss": 0.1642, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 7.16896551724138e-06, |
|
"loss": 0.1676, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 7.0827586206896555e-06, |
|
"loss": 0.149, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.9965517241379315e-06, |
|
"loss": 0.1734, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 6.910344827586207e-06, |
|
"loss": 0.1599, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_cer": 10.058774675781, |
|
"eval_loss": 0.2872118651866913, |
|
"eval_runtime": 1162.7195, |
|
"eval_samples_per_second": 4.809, |
|
"eval_steps_per_second": 0.301, |
|
"step": 13000 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.7516448344080384e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|