whisper-medium-da / trainer_state.json
jstoone's picture
End of training
df6490a
{
"best_metric": 13.708574434508153,
"best_model_checkpoint": "./checkpoint-10000",
"epoch": 31.446540880503143,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"learning_rate": 3.6799999999999996e-07,
"loss": 1.2045,
"step": 25
},
{
"epoch": 0.16,
"learning_rate": 7.68e-07,
"loss": 0.997,
"step": 50
},
{
"epoch": 0.24,
"learning_rate": 1.1679999999999999e-06,
"loss": 0.6901,
"step": 75
},
{
"epoch": 0.31,
"learning_rate": 1.568e-06,
"loss": 0.4398,
"step": 100
},
{
"epoch": 0.39,
"learning_rate": 1.968e-06,
"loss": 0.3942,
"step": 125
},
{
"epoch": 0.47,
"learning_rate": 2.3679999999999996e-06,
"loss": 0.3263,
"step": 150
},
{
"epoch": 0.55,
"learning_rate": 2.7679999999999996e-06,
"loss": 0.3056,
"step": 175
},
{
"epoch": 0.63,
"learning_rate": 3.168e-06,
"loss": 0.2628,
"step": 200
},
{
"epoch": 0.71,
"learning_rate": 3.568e-06,
"loss": 0.2526,
"step": 225
},
{
"epoch": 0.79,
"learning_rate": 3.968e-06,
"loss": 0.257,
"step": 250
},
{
"epoch": 0.86,
"learning_rate": 4.368e-06,
"loss": 0.2296,
"step": 275
},
{
"epoch": 0.94,
"learning_rate": 4.768e-06,
"loss": 0.231,
"step": 300
},
{
"epoch": 1.02,
"learning_rate": 5.168e-06,
"loss": 0.194,
"step": 325
},
{
"epoch": 1.1,
"learning_rate": 5.567999999999999e-06,
"loss": 0.1423,
"step": 350
},
{
"epoch": 1.18,
"learning_rate": 5.9679999999999994e-06,
"loss": 0.1473,
"step": 375
},
{
"epoch": 1.26,
"learning_rate": 6.368e-06,
"loss": 0.1454,
"step": 400
},
{
"epoch": 1.34,
"learning_rate": 6.767999999999999e-06,
"loss": 0.143,
"step": 425
},
{
"epoch": 1.42,
"learning_rate": 7.168e-06,
"loss": 0.141,
"step": 450
},
{
"epoch": 1.49,
"learning_rate": 7.567999999999999e-06,
"loss": 0.1471,
"step": 475
},
{
"epoch": 1.57,
"learning_rate": 7.967999999999999e-06,
"loss": 0.1336,
"step": 500
},
{
"epoch": 1.65,
"learning_rate": 7.980631578947368e-06,
"loss": 0.1264,
"step": 525
},
{
"epoch": 1.73,
"learning_rate": 7.95957894736842e-06,
"loss": 0.1324,
"step": 550
},
{
"epoch": 1.81,
"learning_rate": 7.938526315789473e-06,
"loss": 0.1179,
"step": 575
},
{
"epoch": 1.89,
"learning_rate": 7.917473684210526e-06,
"loss": 0.1243,
"step": 600
},
{
"epoch": 1.97,
"learning_rate": 7.896421052631578e-06,
"loss": 0.1209,
"step": 625
},
{
"epoch": 2.04,
"learning_rate": 7.875368421052631e-06,
"loss": 0.0874,
"step": 650
},
{
"epoch": 2.12,
"learning_rate": 7.854315789473684e-06,
"loss": 0.0612,
"step": 675
},
{
"epoch": 2.2,
"learning_rate": 7.833263157894736e-06,
"loss": 0.0569,
"step": 700
},
{
"epoch": 2.28,
"learning_rate": 7.812210526315789e-06,
"loss": 0.0566,
"step": 725
},
{
"epoch": 2.36,
"learning_rate": 7.791157894736842e-06,
"loss": 0.0522,
"step": 750
},
{
"epoch": 2.44,
"learning_rate": 7.770105263157894e-06,
"loss": 0.0558,
"step": 775
},
{
"epoch": 2.52,
"learning_rate": 7.749052631578947e-06,
"loss": 0.0547,
"step": 800
},
{
"epoch": 2.59,
"learning_rate": 7.728e-06,
"loss": 0.0568,
"step": 825
},
{
"epoch": 2.67,
"learning_rate": 7.706947368421052e-06,
"loss": 0.0569,
"step": 850
},
{
"epoch": 2.75,
"learning_rate": 7.685894736842105e-06,
"loss": 0.0573,
"step": 875
},
{
"epoch": 2.83,
"learning_rate": 7.664842105263157e-06,
"loss": 0.0538,
"step": 900
},
{
"epoch": 2.91,
"learning_rate": 7.64378947368421e-06,
"loss": 0.054,
"step": 925
},
{
"epoch": 2.99,
"learning_rate": 7.622736842105263e-06,
"loss": 0.0553,
"step": 950
},
{
"epoch": 3.07,
"learning_rate": 7.6016842105263155e-06,
"loss": 0.0316,
"step": 975
},
{
"epoch": 3.14,
"learning_rate": 7.580631578947368e-06,
"loss": 0.0265,
"step": 1000
},
{
"epoch": 3.14,
"eval_loss": 0.3689558207988739,
"eval_runtime": 1285.0181,
"eval_samples_per_second": 1.66,
"eval_steps_per_second": 0.208,
"eval_wer": 14.760652288269332,
"step": 1000
},
{
"epoch": 3.22,
"learning_rate": 7.559578947368421e-06,
"loss": 0.0247,
"step": 1025
},
{
"epoch": 3.3,
"learning_rate": 7.5385263157894734e-06,
"loss": 0.0285,
"step": 1050
},
{
"epoch": 3.38,
"learning_rate": 7.517473684210526e-06,
"loss": 0.0266,
"step": 1075
},
{
"epoch": 3.46,
"learning_rate": 7.496421052631579e-06,
"loss": 0.0242,
"step": 1100
},
{
"epoch": 3.54,
"learning_rate": 7.475368421052631e-06,
"loss": 0.0286,
"step": 1125
},
{
"epoch": 3.62,
"learning_rate": 7.454315789473684e-06,
"loss": 0.0254,
"step": 1150
},
{
"epoch": 3.69,
"learning_rate": 7.433263157894736e-06,
"loss": 0.0303,
"step": 1175
},
{
"epoch": 3.77,
"learning_rate": 7.4122105263157885e-06,
"loss": 0.0281,
"step": 1200
},
{
"epoch": 3.85,
"learning_rate": 7.391157894736841e-06,
"loss": 0.0267,
"step": 1225
},
{
"epoch": 3.93,
"learning_rate": 7.370105263157895e-06,
"loss": 0.0272,
"step": 1250
},
{
"epoch": 4.01,
"learning_rate": 7.349052631578947e-06,
"loss": 0.0233,
"step": 1275
},
{
"epoch": 4.09,
"learning_rate": 7.328e-06,
"loss": 0.0133,
"step": 1300
},
{
"epoch": 4.17,
"learning_rate": 7.306947368421053e-06,
"loss": 0.0148,
"step": 1325
},
{
"epoch": 4.25,
"learning_rate": 7.285894736842105e-06,
"loss": 0.0153,
"step": 1350
},
{
"epoch": 4.32,
"learning_rate": 7.264842105263158e-06,
"loss": 0.016,
"step": 1375
},
{
"epoch": 4.4,
"learning_rate": 7.243789473684211e-06,
"loss": 0.0146,
"step": 1400
},
{
"epoch": 4.48,
"learning_rate": 7.222736842105262e-06,
"loss": 0.0147,
"step": 1425
},
{
"epoch": 4.56,
"learning_rate": 7.201684210526315e-06,
"loss": 0.0151,
"step": 1450
},
{
"epoch": 4.64,
"learning_rate": 7.180631578947368e-06,
"loss": 0.0135,
"step": 1475
},
{
"epoch": 4.72,
"learning_rate": 7.15957894736842e-06,
"loss": 0.0159,
"step": 1500
},
{
"epoch": 4.8,
"learning_rate": 7.138526315789473e-06,
"loss": 0.0166,
"step": 1525
},
{
"epoch": 4.87,
"learning_rate": 7.117473684210526e-06,
"loss": 0.0118,
"step": 1550
},
{
"epoch": 4.95,
"learning_rate": 7.096421052631578e-06,
"loss": 0.015,
"step": 1575
},
{
"epoch": 5.03,
"learning_rate": 7.075368421052632e-06,
"loss": 0.0121,
"step": 1600
},
{
"epoch": 5.11,
"learning_rate": 7.0543157894736845e-06,
"loss": 0.0098,
"step": 1625
},
{
"epoch": 5.19,
"learning_rate": 7.033263157894737e-06,
"loss": 0.0083,
"step": 1650
},
{
"epoch": 5.27,
"learning_rate": 7.012210526315789e-06,
"loss": 0.0092,
"step": 1675
},
{
"epoch": 5.35,
"learning_rate": 6.991157894736842e-06,
"loss": 0.0076,
"step": 1700
},
{
"epoch": 5.42,
"learning_rate": 6.970105263157894e-06,
"loss": 0.009,
"step": 1725
},
{
"epoch": 5.5,
"learning_rate": 6.949052631578947e-06,
"loss": 0.0098,
"step": 1750
},
{
"epoch": 5.58,
"learning_rate": 6.9279999999999996e-06,
"loss": 0.0069,
"step": 1775
},
{
"epoch": 5.66,
"learning_rate": 6.906947368421052e-06,
"loss": 0.0095,
"step": 1800
},
{
"epoch": 5.74,
"learning_rate": 6.885894736842105e-06,
"loss": 0.009,
"step": 1825
},
{
"epoch": 5.82,
"learning_rate": 6.8648421052631575e-06,
"loss": 0.0103,
"step": 1850
},
{
"epoch": 5.9,
"learning_rate": 6.84378947368421e-06,
"loss": 0.0106,
"step": 1875
},
{
"epoch": 5.97,
"learning_rate": 6.822736842105263e-06,
"loss": 0.0114,
"step": 1900
},
{
"epoch": 6.05,
"learning_rate": 6.8016842105263155e-06,
"loss": 0.0089,
"step": 1925
},
{
"epoch": 6.13,
"learning_rate": 6.780631578947368e-06,
"loss": 0.0055,
"step": 1950
},
{
"epoch": 6.21,
"learning_rate": 6.759578947368421e-06,
"loss": 0.0051,
"step": 1975
},
{
"epoch": 6.29,
"learning_rate": 6.7385263157894735e-06,
"loss": 0.0063,
"step": 2000
},
{
"epoch": 6.29,
"eval_loss": 0.4341892600059509,
"eval_runtime": 1246.3959,
"eval_samples_per_second": 1.711,
"eval_steps_per_second": 0.214,
"eval_wer": 14.092582851130983,
"step": 2000
},
{
"epoch": 6.37,
"learning_rate": 6.717473684210526e-06,
"loss": 0.006,
"step": 2025
},
{
"epoch": 6.45,
"learning_rate": 6.696421052631579e-06,
"loss": 0.005,
"step": 2050
},
{
"epoch": 6.53,
"learning_rate": 6.6753684210526314e-06,
"loss": 0.0047,
"step": 2075
},
{
"epoch": 6.6,
"learning_rate": 6.654315789473684e-06,
"loss": 0.0066,
"step": 2100
},
{
"epoch": 6.68,
"learning_rate": 6.633263157894737e-06,
"loss": 0.0057,
"step": 2125
},
{
"epoch": 6.76,
"learning_rate": 6.612210526315789e-06,
"loss": 0.0063,
"step": 2150
},
{
"epoch": 6.84,
"learning_rate": 6.591157894736841e-06,
"loss": 0.0067,
"step": 2175
},
{
"epoch": 6.92,
"learning_rate": 6.570105263157894e-06,
"loss": 0.006,
"step": 2200
},
{
"epoch": 7.0,
"learning_rate": 6.5490526315789465e-06,
"loss": 0.0066,
"step": 2225
},
{
"epoch": 7.08,
"learning_rate": 6.527999999999999e-06,
"loss": 0.0056,
"step": 2250
},
{
"epoch": 7.15,
"learning_rate": 6.506947368421053e-06,
"loss": 0.0036,
"step": 2275
},
{
"epoch": 7.23,
"learning_rate": 6.485894736842105e-06,
"loss": 0.0034,
"step": 2300
},
{
"epoch": 7.31,
"learning_rate": 6.464842105263158e-06,
"loss": 0.0027,
"step": 2325
},
{
"epoch": 7.39,
"learning_rate": 6.443789473684211e-06,
"loss": 0.0044,
"step": 2350
},
{
"epoch": 7.47,
"learning_rate": 6.422736842105263e-06,
"loss": 0.005,
"step": 2375
},
{
"epoch": 7.55,
"learning_rate": 6.401684210526316e-06,
"loss": 0.0034,
"step": 2400
},
{
"epoch": 7.63,
"learning_rate": 6.380631578947368e-06,
"loss": 0.0028,
"step": 2425
},
{
"epoch": 7.7,
"learning_rate": 6.35957894736842e-06,
"loss": 0.0037,
"step": 2450
},
{
"epoch": 7.78,
"learning_rate": 6.338526315789473e-06,
"loss": 0.0045,
"step": 2475
},
{
"epoch": 7.86,
"learning_rate": 6.317473684210526e-06,
"loss": 0.005,
"step": 2500
},
{
"epoch": 7.94,
"learning_rate": 6.296421052631578e-06,
"loss": 0.0063,
"step": 2525
},
{
"epoch": 8.02,
"learning_rate": 6.275368421052631e-06,
"loss": 0.0051,
"step": 2550
},
{
"epoch": 8.1,
"learning_rate": 6.254315789473684e-06,
"loss": 0.0038,
"step": 2575
},
{
"epoch": 8.18,
"learning_rate": 6.233263157894737e-06,
"loss": 0.0035,
"step": 2600
},
{
"epoch": 8.25,
"learning_rate": 6.21221052631579e-06,
"loss": 0.0036,
"step": 2625
},
{
"epoch": 8.33,
"learning_rate": 6.1911578947368425e-06,
"loss": 0.0034,
"step": 2650
},
{
"epoch": 8.41,
"learning_rate": 6.170105263157894e-06,
"loss": 0.0028,
"step": 2675
},
{
"epoch": 8.49,
"learning_rate": 6.149052631578947e-06,
"loss": 0.0028,
"step": 2700
},
{
"epoch": 8.57,
"learning_rate": 6.128e-06,
"loss": 0.0024,
"step": 2725
},
{
"epoch": 8.65,
"learning_rate": 6.106947368421052e-06,
"loss": 0.0029,
"step": 2750
},
{
"epoch": 8.73,
"learning_rate": 6.085894736842105e-06,
"loss": 0.0032,
"step": 2775
},
{
"epoch": 8.81,
"learning_rate": 6.0648421052631576e-06,
"loss": 0.0031,
"step": 2800
},
{
"epoch": 8.88,
"learning_rate": 6.04378947368421e-06,
"loss": 0.0023,
"step": 2825
},
{
"epoch": 8.96,
"learning_rate": 6.022736842105263e-06,
"loss": 0.0028,
"step": 2850
},
{
"epoch": 9.04,
"learning_rate": 6.0016842105263155e-06,
"loss": 0.0032,
"step": 2875
},
{
"epoch": 9.12,
"learning_rate": 5.980631578947368e-06,
"loss": 0.0037,
"step": 2900
},
{
"epoch": 9.2,
"learning_rate": 5.95957894736842e-06,
"loss": 0.0021,
"step": 2925
},
{
"epoch": 9.28,
"learning_rate": 5.9385263157894735e-06,
"loss": 0.0021,
"step": 2950
},
{
"epoch": 9.36,
"learning_rate": 5.917473684210526e-06,
"loss": 0.0022,
"step": 2975
},
{
"epoch": 9.43,
"learning_rate": 5.896421052631579e-06,
"loss": 0.0016,
"step": 3000
},
{
"epoch": 9.43,
"eval_loss": 0.48472946882247925,
"eval_runtime": 1245.478,
"eval_samples_per_second": 1.713,
"eval_steps_per_second": 0.214,
"eval_wer": 14.360862703840086,
"step": 3000
},
{
"epoch": 9.51,
"learning_rate": 5.8753684210526315e-06,
"loss": 0.002,
"step": 3025
},
{
"epoch": 9.59,
"learning_rate": 5.854315789473684e-06,
"loss": 0.0027,
"step": 3050
},
{
"epoch": 9.67,
"learning_rate": 5.833263157894737e-06,
"loss": 0.0027,
"step": 3075
},
{
"epoch": 9.75,
"learning_rate": 5.8122105263157894e-06,
"loss": 0.0038,
"step": 3100
},
{
"epoch": 9.83,
"learning_rate": 5.791157894736842e-06,
"loss": 0.0027,
"step": 3125
},
{
"epoch": 9.91,
"learning_rate": 5.770105263157895e-06,
"loss": 0.0021,
"step": 3150
},
{
"epoch": 9.98,
"learning_rate": 5.7490526315789465e-06,
"loss": 0.0024,
"step": 3175
},
{
"epoch": 10.06,
"learning_rate": 5.727999999999999e-06,
"loss": 0.0017,
"step": 3200
},
{
"epoch": 10.14,
"learning_rate": 5.706947368421052e-06,
"loss": 0.0024,
"step": 3225
},
{
"epoch": 10.22,
"learning_rate": 5.6858947368421045e-06,
"loss": 0.0023,
"step": 3250
},
{
"epoch": 10.3,
"learning_rate": 5.664842105263157e-06,
"loss": 0.0018,
"step": 3275
},
{
"epoch": 10.38,
"learning_rate": 5.643789473684211e-06,
"loss": 0.0019,
"step": 3300
},
{
"epoch": 10.46,
"learning_rate": 5.622736842105263e-06,
"loss": 0.0027,
"step": 3325
},
{
"epoch": 10.53,
"learning_rate": 5.601684210526316e-06,
"loss": 0.0027,
"step": 3350
},
{
"epoch": 10.61,
"learning_rate": 5.580631578947369e-06,
"loss": 0.0022,
"step": 3375
},
{
"epoch": 10.69,
"learning_rate": 5.559578947368421e-06,
"loss": 0.0019,
"step": 3400
},
{
"epoch": 10.77,
"learning_rate": 5.538526315789473e-06,
"loss": 0.0018,
"step": 3425
},
{
"epoch": 10.85,
"learning_rate": 5.517473684210526e-06,
"loss": 0.002,
"step": 3450
},
{
"epoch": 10.93,
"learning_rate": 5.496421052631578e-06,
"loss": 0.0034,
"step": 3475
},
{
"epoch": 11.01,
"learning_rate": 5.475368421052631e-06,
"loss": 0.0017,
"step": 3500
},
{
"epoch": 11.08,
"learning_rate": 5.454315789473684e-06,
"loss": 0.002,
"step": 3525
},
{
"epoch": 11.16,
"learning_rate": 5.433263157894736e-06,
"loss": 0.0019,
"step": 3550
},
{
"epoch": 11.24,
"learning_rate": 5.412210526315789e-06,
"loss": 0.0022,
"step": 3575
},
{
"epoch": 11.32,
"learning_rate": 5.391157894736842e-06,
"loss": 0.0015,
"step": 3600
},
{
"epoch": 11.4,
"learning_rate": 5.370105263157895e-06,
"loss": 0.0014,
"step": 3625
},
{
"epoch": 11.48,
"learning_rate": 5.349052631578948e-06,
"loss": 0.0017,
"step": 3650
},
{
"epoch": 11.56,
"learning_rate": 5.328e-06,
"loss": 0.0015,
"step": 3675
},
{
"epoch": 11.64,
"learning_rate": 5.306947368421052e-06,
"loss": 0.0016,
"step": 3700
},
{
"epoch": 11.71,
"learning_rate": 5.285894736842105e-06,
"loss": 0.0026,
"step": 3725
},
{
"epoch": 11.79,
"learning_rate": 5.264842105263158e-06,
"loss": 0.0026,
"step": 3750
},
{
"epoch": 11.87,
"learning_rate": 5.24378947368421e-06,
"loss": 0.0026,
"step": 3775
},
{
"epoch": 11.95,
"learning_rate": 5.222736842105263e-06,
"loss": 0.0019,
"step": 3800
},
{
"epoch": 12.03,
"learning_rate": 5.2016842105263156e-06,
"loss": 0.0019,
"step": 3825
},
{
"epoch": 12.11,
"learning_rate": 5.180631578947368e-06,
"loss": 0.0018,
"step": 3850
},
{
"epoch": 12.19,
"learning_rate": 5.159578947368421e-06,
"loss": 0.0021,
"step": 3875
},
{
"epoch": 12.26,
"learning_rate": 5.1385263157894735e-06,
"loss": 0.0015,
"step": 3900
},
{
"epoch": 12.34,
"learning_rate": 5.117473684210525e-06,
"loss": 0.0019,
"step": 3925
},
{
"epoch": 12.42,
"learning_rate": 5.096421052631578e-06,
"loss": 0.0026,
"step": 3950
},
{
"epoch": 12.5,
"learning_rate": 5.0753684210526315e-06,
"loss": 0.0015,
"step": 3975
},
{
"epoch": 12.58,
"learning_rate": 5.054315789473684e-06,
"loss": 0.002,
"step": 4000
},
{
"epoch": 12.58,
"eval_loss": 0.4919339120388031,
"eval_runtime": 1243.533,
"eval_samples_per_second": 1.715,
"eval_steps_per_second": 0.215,
"eval_wer": 14.171488690163073,
"step": 4000
},
{
"epoch": 12.66,
"learning_rate": 5.033263157894737e-06,
"loss": 0.0017,
"step": 4025
},
{
"epoch": 12.74,
"learning_rate": 5.0122105263157895e-06,
"loss": 0.0025,
"step": 4050
},
{
"epoch": 12.81,
"learning_rate": 4.991157894736842e-06,
"loss": 0.0025,
"step": 4075
},
{
"epoch": 12.89,
"learning_rate": 4.970105263157895e-06,
"loss": 0.0016,
"step": 4100
},
{
"epoch": 12.97,
"learning_rate": 4.9490526315789474e-06,
"loss": 0.0027,
"step": 4125
},
{
"epoch": 13.05,
"learning_rate": 4.928e-06,
"loss": 0.0021,
"step": 4150
},
{
"epoch": 13.13,
"learning_rate": 4.906947368421052e-06,
"loss": 0.0012,
"step": 4175
},
{
"epoch": 13.21,
"learning_rate": 4.8858947368421045e-06,
"loss": 0.0018,
"step": 4200
},
{
"epoch": 13.29,
"learning_rate": 4.864842105263157e-06,
"loss": 0.0012,
"step": 4225
},
{
"epoch": 13.36,
"learning_rate": 4.84378947368421e-06,
"loss": 0.0012,
"step": 4250
},
{
"epoch": 13.44,
"learning_rate": 4.8227368421052625e-06,
"loss": 0.0008,
"step": 4275
},
{
"epoch": 13.52,
"learning_rate": 4.801684210526316e-06,
"loss": 0.001,
"step": 4300
},
{
"epoch": 13.6,
"learning_rate": 4.780631578947369e-06,
"loss": 0.001,
"step": 4325
},
{
"epoch": 13.68,
"learning_rate": 4.759578947368421e-06,
"loss": 0.001,
"step": 4350
},
{
"epoch": 13.76,
"learning_rate": 4.738526315789474e-06,
"loss": 0.0011,
"step": 4375
},
{
"epoch": 13.84,
"learning_rate": 4.717473684210527e-06,
"loss": 0.0011,
"step": 4400
},
{
"epoch": 13.92,
"learning_rate": 4.6964210526315784e-06,
"loss": 0.0008,
"step": 4425
},
{
"epoch": 13.99,
"learning_rate": 4.675368421052631e-06,
"loss": 0.0021,
"step": 4450
},
{
"epoch": 14.07,
"learning_rate": 4.654315789473684e-06,
"loss": 0.0018,
"step": 4475
},
{
"epoch": 14.15,
"learning_rate": 4.633263157894736e-06,
"loss": 0.0013,
"step": 4500
},
{
"epoch": 14.23,
"learning_rate": 4.612210526315789e-06,
"loss": 0.0009,
"step": 4525
},
{
"epoch": 14.31,
"learning_rate": 4.591157894736842e-06,
"loss": 0.0012,
"step": 4550
},
{
"epoch": 14.39,
"learning_rate": 4.570105263157894e-06,
"loss": 0.0009,
"step": 4575
},
{
"epoch": 14.47,
"learning_rate": 4.549052631578947e-06,
"loss": 0.002,
"step": 4600
},
{
"epoch": 14.54,
"learning_rate": 4.528e-06,
"loss": 0.0011,
"step": 4625
},
{
"epoch": 14.62,
"learning_rate": 4.506947368421053e-06,
"loss": 0.0006,
"step": 4650
},
{
"epoch": 14.7,
"learning_rate": 4.485894736842105e-06,
"loss": 0.0013,
"step": 4675
},
{
"epoch": 14.78,
"learning_rate": 4.464842105263158e-06,
"loss": 0.0011,
"step": 4700
},
{
"epoch": 14.86,
"learning_rate": 4.44378947368421e-06,
"loss": 0.0013,
"step": 4725
},
{
"epoch": 14.94,
"learning_rate": 4.422736842105263e-06,
"loss": 0.0012,
"step": 4750
},
{
"epoch": 15.02,
"learning_rate": 4.401684210526316e-06,
"loss": 0.0024,
"step": 4775
},
{
"epoch": 15.09,
"learning_rate": 4.380631578947368e-06,
"loss": 0.0013,
"step": 4800
},
{
"epoch": 15.17,
"learning_rate": 4.359578947368421e-06,
"loss": 0.0013,
"step": 4825
},
{
"epoch": 15.25,
"learning_rate": 4.3385263157894736e-06,
"loss": 0.0014,
"step": 4850
},
{
"epoch": 15.33,
"learning_rate": 4.317473684210526e-06,
"loss": 0.0015,
"step": 4875
},
{
"epoch": 15.41,
"learning_rate": 4.296421052631579e-06,
"loss": 0.0006,
"step": 4900
},
{
"epoch": 15.49,
"learning_rate": 4.275368421052631e-06,
"loss": 0.0008,
"step": 4925
},
{
"epoch": 15.57,
"learning_rate": 4.254315789473683e-06,
"loss": 0.0008,
"step": 4950
},
{
"epoch": 15.64,
"learning_rate": 4.233263157894737e-06,
"loss": 0.0007,
"step": 4975
},
{
"epoch": 15.72,
"learning_rate": 4.2122105263157895e-06,
"loss": 0.0013,
"step": 5000
},
{
"epoch": 15.72,
"eval_loss": 0.5114014744758606,
"eval_runtime": 1251.3958,
"eval_samples_per_second": 1.704,
"eval_steps_per_second": 0.213,
"eval_wer": 14.229352972119939,
"step": 5000
},
{
"epoch": 15.8,
"learning_rate": 4.191157894736842e-06,
"loss": 0.0011,
"step": 5025
},
{
"epoch": 15.88,
"learning_rate": 4.170105263157895e-06,
"loss": 0.0011,
"step": 5050
},
{
"epoch": 15.96,
"learning_rate": 4.1490526315789475e-06,
"loss": 0.0007,
"step": 5075
},
{
"epoch": 16.04,
"learning_rate": 4.128e-06,
"loss": 0.0011,
"step": 5100
},
{
"epoch": 16.12,
"learning_rate": 4.106947368421053e-06,
"loss": 0.0012,
"step": 5125
},
{
"epoch": 16.19,
"learning_rate": 4.0858947368421054e-06,
"loss": 0.0005,
"step": 5150
},
{
"epoch": 16.27,
"learning_rate": 4.064842105263157e-06,
"loss": 0.0016,
"step": 5175
},
{
"epoch": 16.35,
"learning_rate": 4.04378947368421e-06,
"loss": 0.0009,
"step": 5200
},
{
"epoch": 16.43,
"learning_rate": 4.0227368421052625e-06,
"loss": 0.0005,
"step": 5225
},
{
"epoch": 16.51,
"learning_rate": 4.001684210526315e-06,
"loss": 0.0006,
"step": 5250
},
{
"epoch": 16.59,
"learning_rate": 3.980631578947369e-06,
"loss": 0.0005,
"step": 5275
},
{
"epoch": 16.67,
"learning_rate": 3.9595789473684205e-06,
"loss": 0.0005,
"step": 5300
},
{
"epoch": 16.75,
"learning_rate": 3.939368421052631e-06,
"loss": 0.0015,
"step": 5325
},
{
"epoch": 16.82,
"learning_rate": 3.918315789473684e-06,
"loss": 0.0006,
"step": 5350
},
{
"epoch": 16.9,
"learning_rate": 3.897263157894737e-06,
"loss": 0.0005,
"step": 5375
},
{
"epoch": 16.98,
"learning_rate": 3.87621052631579e-06,
"loss": 0.0014,
"step": 5400
},
{
"epoch": 17.06,
"learning_rate": 3.855157894736842e-06,
"loss": 0.0012,
"step": 5425
},
{
"epoch": 17.14,
"learning_rate": 3.834105263157894e-06,
"loss": 0.0024,
"step": 5450
},
{
"epoch": 17.22,
"learning_rate": 3.813052631578947e-06,
"loss": 0.0007,
"step": 5475
},
{
"epoch": 17.3,
"learning_rate": 3.7919999999999994e-06,
"loss": 0.001,
"step": 5500
},
{
"epoch": 17.37,
"learning_rate": 3.7709473684210525e-06,
"loss": 0.0012,
"step": 5525
},
{
"epoch": 17.45,
"learning_rate": 3.749894736842105e-06,
"loss": 0.0008,
"step": 5550
},
{
"epoch": 17.53,
"learning_rate": 3.728842105263158e-06,
"loss": 0.0004,
"step": 5575
},
{
"epoch": 17.61,
"learning_rate": 3.7077894736842105e-06,
"loss": 0.0005,
"step": 5600
},
{
"epoch": 17.69,
"learning_rate": 3.6867368421052627e-06,
"loss": 0.0005,
"step": 5625
},
{
"epoch": 17.77,
"learning_rate": 3.6656842105263154e-06,
"loss": 0.0007,
"step": 5650
},
{
"epoch": 17.85,
"learning_rate": 3.644631578947368e-06,
"loss": 0.0011,
"step": 5675
},
{
"epoch": 17.92,
"learning_rate": 3.623578947368421e-06,
"loss": 0.0005,
"step": 5700
},
{
"epoch": 18.0,
"learning_rate": 3.6025263157894738e-06,
"loss": 0.0004,
"step": 5725
},
{
"epoch": 18.08,
"learning_rate": 3.5814736842105264e-06,
"loss": 0.0007,
"step": 5750
},
{
"epoch": 18.16,
"learning_rate": 3.5604210526315786e-06,
"loss": 0.0003,
"step": 5775
},
{
"epoch": 18.24,
"learning_rate": 3.5393684210526313e-06,
"loss": 0.0003,
"step": 5800
},
{
"epoch": 18.32,
"learning_rate": 3.518315789473684e-06,
"loss": 0.0003,
"step": 5825
},
{
"epoch": 18.4,
"learning_rate": 3.4972631578947366e-06,
"loss": 0.001,
"step": 5850
},
{
"epoch": 18.47,
"learning_rate": 3.4762105263157897e-06,
"loss": 0.0003,
"step": 5875
},
{
"epoch": 18.55,
"learning_rate": 3.455157894736842e-06,
"loss": 0.0004,
"step": 5900
},
{
"epoch": 18.63,
"learning_rate": 3.4341052631578946e-06,
"loss": 0.0003,
"step": 5925
},
{
"epoch": 18.71,
"learning_rate": 3.4130526315789472e-06,
"loss": 0.0004,
"step": 5950
},
{
"epoch": 18.79,
"learning_rate": 3.392e-06,
"loss": 0.0004,
"step": 5975
},
{
"epoch": 18.87,
"learning_rate": 3.370947368421052e-06,
"loss": 0.0014,
"step": 6000
},
{
"epoch": 18.87,
"eval_loss": 0.5197107791900635,
"eval_runtime": 1246.3616,
"eval_samples_per_second": 1.711,
"eval_steps_per_second": 0.214,
"eval_wer": 13.913729615991583,
"step": 6000
},
{
"epoch": 18.95,
"learning_rate": 3.349894736842105e-06,
"loss": 0.0003,
"step": 6025
},
{
"epoch": 19.03,
"learning_rate": 3.328842105263158e-06,
"loss": 0.0003,
"step": 6050
},
{
"epoch": 19.1,
"learning_rate": 3.3077894736842105e-06,
"loss": 0.0004,
"step": 6075
},
{
"epoch": 19.18,
"learning_rate": 3.286736842105263e-06,
"loss": 0.0002,
"step": 6100
},
{
"epoch": 19.26,
"learning_rate": 3.2656842105263154e-06,
"loss": 0.0002,
"step": 6125
},
{
"epoch": 19.34,
"learning_rate": 3.244631578947368e-06,
"loss": 0.0004,
"step": 6150
},
{
"epoch": 19.42,
"learning_rate": 3.2235789473684207e-06,
"loss": 0.0005,
"step": 6175
},
{
"epoch": 19.5,
"learning_rate": 3.2025263157894738e-06,
"loss": 0.0003,
"step": 6200
},
{
"epoch": 19.58,
"learning_rate": 3.1814736842105264e-06,
"loss": 0.0007,
"step": 6225
},
{
"epoch": 19.65,
"learning_rate": 3.160421052631579e-06,
"loss": 0.0005,
"step": 6250
},
{
"epoch": 19.73,
"learning_rate": 3.1393684210526313e-06,
"loss": 0.0007,
"step": 6275
},
{
"epoch": 19.81,
"learning_rate": 3.118315789473684e-06,
"loss": 0.0002,
"step": 6300
},
{
"epoch": 19.89,
"learning_rate": 3.0972631578947366e-06,
"loss": 0.0008,
"step": 6325
},
{
"epoch": 19.97,
"learning_rate": 3.0762105263157893e-06,
"loss": 0.0013,
"step": 6350
},
{
"epoch": 20.05,
"learning_rate": 3.0551578947368424e-06,
"loss": 0.0004,
"step": 6375
},
{
"epoch": 20.13,
"learning_rate": 3.0341052631578946e-06,
"loss": 0.0002,
"step": 6400
},
{
"epoch": 20.2,
"learning_rate": 3.0130526315789472e-06,
"loss": 0.0004,
"step": 6425
},
{
"epoch": 20.28,
"learning_rate": 2.992e-06,
"loss": 0.0003,
"step": 6450
},
{
"epoch": 20.36,
"learning_rate": 2.9709473684210526e-06,
"loss": 0.0008,
"step": 6475
},
{
"epoch": 20.44,
"learning_rate": 2.9498947368421048e-06,
"loss": 0.0006,
"step": 6500
},
{
"epoch": 20.52,
"learning_rate": 2.9288421052631574e-06,
"loss": 0.0008,
"step": 6525
},
{
"epoch": 20.6,
"learning_rate": 2.9077894736842105e-06,
"loss": 0.0007,
"step": 6550
},
{
"epoch": 20.68,
"learning_rate": 2.886736842105263e-06,
"loss": 0.0007,
"step": 6575
},
{
"epoch": 20.75,
"learning_rate": 2.865684210526316e-06,
"loss": 0.0003,
"step": 6600
},
{
"epoch": 20.83,
"learning_rate": 2.8446315789473685e-06,
"loss": 0.0004,
"step": 6625
},
{
"epoch": 20.91,
"learning_rate": 2.8235789473684207e-06,
"loss": 0.0006,
"step": 6650
},
{
"epoch": 20.99,
"learning_rate": 2.8025263157894734e-06,
"loss": 0.0006,
"step": 6675
},
{
"epoch": 21.07,
"learning_rate": 2.781473684210526e-06,
"loss": 0.0008,
"step": 6700
},
{
"epoch": 21.15,
"learning_rate": 2.760421052631579e-06,
"loss": 0.0003,
"step": 6725
},
{
"epoch": 21.23,
"learning_rate": 2.7393684210526318e-06,
"loss": 0.0002,
"step": 6750
},
{
"epoch": 21.31,
"learning_rate": 2.718315789473684e-06,
"loss": 0.0002,
"step": 6775
},
{
"epoch": 21.38,
"learning_rate": 2.6972631578947366e-06,
"loss": 0.0003,
"step": 6800
},
{
"epoch": 21.46,
"learning_rate": 2.6762105263157893e-06,
"loss": 0.0005,
"step": 6825
},
{
"epoch": 21.54,
"learning_rate": 2.655157894736842e-06,
"loss": 0.0003,
"step": 6850
},
{
"epoch": 21.62,
"learning_rate": 2.634105263157895e-06,
"loss": 0.0004,
"step": 6875
},
{
"epoch": 21.7,
"learning_rate": 2.6130526315789473e-06,
"loss": 0.0003,
"step": 6900
},
{
"epoch": 21.78,
"learning_rate": 2.592e-06,
"loss": 0.0003,
"step": 6925
},
{
"epoch": 21.86,
"learning_rate": 2.5709473684210526e-06,
"loss": 0.0008,
"step": 6950
},
{
"epoch": 21.93,
"learning_rate": 2.5498947368421052e-06,
"loss": 0.0006,
"step": 6975
},
{
"epoch": 22.01,
"learning_rate": 2.5288421052631575e-06,
"loss": 0.0003,
"step": 7000
},
{
"epoch": 22.01,
"eval_loss": 0.5421546101570129,
"eval_runtime": 1231.4206,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"eval_wer": 14.1977906365071,
"step": 7000
},
{
"epoch": 22.09,
"learning_rate": 2.50778947368421e-06,
"loss": 0.0005,
"step": 7025
},
{
"epoch": 22.17,
"learning_rate": 2.486736842105263e-06,
"loss": 0.0002,
"step": 7050
},
{
"epoch": 22.25,
"learning_rate": 2.465684210526316e-06,
"loss": 0.0003,
"step": 7075
},
{
"epoch": 22.33,
"learning_rate": 2.4446315789473685e-06,
"loss": 0.0002,
"step": 7100
},
{
"epoch": 22.41,
"learning_rate": 2.423578947368421e-06,
"loss": 0.0003,
"step": 7125
},
{
"epoch": 22.48,
"learning_rate": 2.4025263157894734e-06,
"loss": 0.0002,
"step": 7150
},
{
"epoch": 22.56,
"learning_rate": 2.381473684210526e-06,
"loss": 0.0004,
"step": 7175
},
{
"epoch": 22.64,
"learning_rate": 2.3604210526315787e-06,
"loss": 0.0002,
"step": 7200
},
{
"epoch": 22.72,
"learning_rate": 2.3393684210526318e-06,
"loss": 0.0004,
"step": 7225
},
{
"epoch": 22.8,
"learning_rate": 2.3183157894736844e-06,
"loss": 0.0002,
"step": 7250
},
{
"epoch": 22.88,
"learning_rate": 2.2972631578947367e-06,
"loss": 0.0002,
"step": 7275
},
{
"epoch": 22.96,
"learning_rate": 2.2762105263157893e-06,
"loss": 0.0002,
"step": 7300
},
{
"epoch": 23.03,
"learning_rate": 2.255157894736842e-06,
"loss": 0.0003,
"step": 7325
},
{
"epoch": 23.11,
"learning_rate": 2.2341052631578946e-06,
"loss": 0.0002,
"step": 7350
},
{
"epoch": 23.19,
"learning_rate": 2.213052631578947e-06,
"loss": 0.0002,
"step": 7375
},
{
"epoch": 23.27,
"learning_rate": 2.192e-06,
"loss": 0.0001,
"step": 7400
},
{
"epoch": 23.35,
"learning_rate": 2.1709473684210526e-06,
"loss": 0.0001,
"step": 7425
},
{
"epoch": 23.43,
"learning_rate": 2.1498947368421052e-06,
"loss": 0.0001,
"step": 7450
},
{
"epoch": 23.51,
"learning_rate": 2.128842105263158e-06,
"loss": 0.0001,
"step": 7475
},
{
"epoch": 23.58,
"learning_rate": 2.10778947368421e-06,
"loss": 0.0001,
"step": 7500
},
{
"epoch": 23.66,
"learning_rate": 2.0867368421052628e-06,
"loss": 0.0001,
"step": 7525
},
{
"epoch": 23.74,
"learning_rate": 2.065684210526316e-06,
"loss": 0.0001,
"step": 7550
},
{
"epoch": 23.82,
"learning_rate": 2.0446315789473685e-06,
"loss": 0.0001,
"step": 7575
},
{
"epoch": 23.9,
"learning_rate": 2.023578947368421e-06,
"loss": 0.0001,
"step": 7600
},
{
"epoch": 23.98,
"learning_rate": 2.002526315789474e-06,
"loss": 0.0001,
"step": 7625
},
{
"epoch": 24.06,
"learning_rate": 1.981473684210526e-06,
"loss": 0.0001,
"step": 7650
},
{
"epoch": 24.14,
"learning_rate": 1.9604210526315787e-06,
"loss": 0.0001,
"step": 7675
},
{
"epoch": 24.21,
"learning_rate": 1.9393684210526314e-06,
"loss": 0.0001,
"step": 7700
},
{
"epoch": 24.29,
"learning_rate": 1.918315789473684e-06,
"loss": 0.0001,
"step": 7725
},
{
"epoch": 24.37,
"learning_rate": 1.8972631578947367e-06,
"loss": 0.0001,
"step": 7750
},
{
"epoch": 24.45,
"learning_rate": 1.8762105263157895e-06,
"loss": 0.0001,
"step": 7775
},
{
"epoch": 24.53,
"learning_rate": 1.855157894736842e-06,
"loss": 0.0001,
"step": 7800
},
{
"epoch": 24.61,
"learning_rate": 1.8341052631578946e-06,
"loss": 0.0001,
"step": 7825
},
{
"epoch": 24.69,
"learning_rate": 1.8130526315789473e-06,
"loss": 0.0001,
"step": 7850
},
{
"epoch": 24.76,
"learning_rate": 1.792e-06,
"loss": 0.0001,
"step": 7875
},
{
"epoch": 24.84,
"learning_rate": 1.7709473684210526e-06,
"loss": 0.0001,
"step": 7900
},
{
"epoch": 24.92,
"learning_rate": 1.749894736842105e-06,
"loss": 0.0001,
"step": 7925
},
{
"epoch": 25.0,
"learning_rate": 1.728842105263158e-06,
"loss": 0.0001,
"step": 7950
},
{
"epoch": 25.08,
"learning_rate": 1.7077894736842104e-06,
"loss": 0.0001,
"step": 7975
},
{
"epoch": 25.16,
"learning_rate": 1.686736842105263e-06,
"loss": 0.0001,
"step": 8000
},
{
"epoch": 25.16,
"eval_loss": 0.5658935904502869,
"eval_runtime": 1242.8998,
"eval_samples_per_second": 1.716,
"eval_steps_per_second": 0.215,
"eval_wer": 13.871646501841136,
"step": 8000
},
{
"epoch": 25.24,
"learning_rate": 1.6656842105263159e-06,
"loss": 0.0001,
"step": 8025
},
{
"epoch": 25.31,
"learning_rate": 1.6446315789473683e-06,
"loss": 0.0001,
"step": 8050
},
{
"epoch": 25.39,
"learning_rate": 1.623578947368421e-06,
"loss": 0.0001,
"step": 8075
},
{
"epoch": 25.47,
"learning_rate": 1.6025263157894734e-06,
"loss": 0.0001,
"step": 8100
},
{
"epoch": 25.55,
"learning_rate": 1.5814736842105263e-06,
"loss": 0.0001,
"step": 8125
},
{
"epoch": 25.63,
"learning_rate": 1.560421052631579e-06,
"loss": 0.0001,
"step": 8150
},
{
"epoch": 25.71,
"learning_rate": 1.5393684210526314e-06,
"loss": 0.0001,
"step": 8175
},
{
"epoch": 25.79,
"learning_rate": 1.5183157894736843e-06,
"loss": 0.0001,
"step": 8200
},
{
"epoch": 25.86,
"learning_rate": 1.4972631578947367e-06,
"loss": 0.0001,
"step": 8225
},
{
"epoch": 25.94,
"learning_rate": 1.4762105263157894e-06,
"loss": 0.0001,
"step": 8250
},
{
"epoch": 26.02,
"learning_rate": 1.4551578947368422e-06,
"loss": 0.0001,
"step": 8275
},
{
"epoch": 26.1,
"learning_rate": 1.4341052631578947e-06,
"loss": 0.0001,
"step": 8300
},
{
"epoch": 26.18,
"learning_rate": 1.4130526315789473e-06,
"loss": 0.0001,
"step": 8325
},
{
"epoch": 26.26,
"learning_rate": 1.3919999999999998e-06,
"loss": 0.0001,
"step": 8350
},
{
"epoch": 26.34,
"learning_rate": 1.3709473684210526e-06,
"loss": 0.0001,
"step": 8375
},
{
"epoch": 26.42,
"learning_rate": 1.3498947368421053e-06,
"loss": 0.0001,
"step": 8400
},
{
"epoch": 26.49,
"learning_rate": 1.3288421052631577e-06,
"loss": 0.0001,
"step": 8425
},
{
"epoch": 26.57,
"learning_rate": 1.3077894736842106e-06,
"loss": 0.0001,
"step": 8450
},
{
"epoch": 26.65,
"learning_rate": 1.286736842105263e-06,
"loss": 0.0001,
"step": 8475
},
{
"epoch": 26.73,
"learning_rate": 1.2656842105263157e-06,
"loss": 0.0001,
"step": 8500
},
{
"epoch": 26.81,
"learning_rate": 1.2446315789473683e-06,
"loss": 0.0001,
"step": 8525
},
{
"epoch": 26.89,
"learning_rate": 1.223578947368421e-06,
"loss": 0.0001,
"step": 8550
},
{
"epoch": 26.97,
"learning_rate": 1.2025263157894737e-06,
"loss": 0.0001,
"step": 8575
},
{
"epoch": 27.04,
"learning_rate": 1.181473684210526e-06,
"loss": 0.0001,
"step": 8600
},
{
"epoch": 27.12,
"learning_rate": 1.160421052631579e-06,
"loss": 0.0001,
"step": 8625
},
{
"epoch": 27.2,
"learning_rate": 1.1393684210526316e-06,
"loss": 0.0001,
"step": 8650
},
{
"epoch": 27.28,
"learning_rate": 1.118315789473684e-06,
"loss": 0.0001,
"step": 8675
},
{
"epoch": 27.36,
"learning_rate": 1.097263157894737e-06,
"loss": 0.0001,
"step": 8700
},
{
"epoch": 27.44,
"learning_rate": 1.0762105263157894e-06,
"loss": 0.0001,
"step": 8725
},
{
"epoch": 27.52,
"learning_rate": 1.055157894736842e-06,
"loss": 0.0001,
"step": 8750
},
{
"epoch": 27.59,
"learning_rate": 1.0341052631578947e-06,
"loss": 0.0001,
"step": 8775
},
{
"epoch": 27.67,
"learning_rate": 1.0130526315789473e-06,
"loss": 0.0001,
"step": 8800
},
{
"epoch": 27.75,
"learning_rate": 9.92e-07,
"loss": 0.0001,
"step": 8825
},
{
"epoch": 27.83,
"learning_rate": 9.709473684210526e-07,
"loss": 0.0001,
"step": 8850
},
{
"epoch": 27.91,
"learning_rate": 9.498947368421052e-07,
"loss": 0.0001,
"step": 8875
},
{
"epoch": 27.99,
"learning_rate": 9.288421052631578e-07,
"loss": 0.0001,
"step": 8900
},
{
"epoch": 28.07,
"learning_rate": 9.077894736842104e-07,
"loss": 0.0001,
"step": 8925
},
{
"epoch": 28.14,
"learning_rate": 8.867368421052632e-07,
"loss": 0.0001,
"step": 8950
},
{
"epoch": 28.22,
"learning_rate": 8.656842105263158e-07,
"loss": 0.0001,
"step": 8975
},
{
"epoch": 28.3,
"learning_rate": 8.446315789473684e-07,
"loss": 0.0001,
"step": 9000
},
{
"epoch": 28.3,
"eval_loss": 0.5772180557250977,
"eval_runtime": 1231.7677,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.217,
"eval_wer": 13.729615991583376,
"step": 9000
},
{
"epoch": 28.38,
"learning_rate": 8.23578947368421e-07,
"loss": 0.0001,
"step": 9025
},
{
"epoch": 28.46,
"learning_rate": 8.025263157894736e-07,
"loss": 0.0001,
"step": 9050
},
{
"epoch": 28.54,
"learning_rate": 7.814736842105263e-07,
"loss": 0.0001,
"step": 9075
},
{
"epoch": 28.62,
"learning_rate": 7.60421052631579e-07,
"loss": 0.0001,
"step": 9100
},
{
"epoch": 28.69,
"learning_rate": 7.393684210526315e-07,
"loss": 0.0001,
"step": 9125
},
{
"epoch": 28.77,
"learning_rate": 7.183157894736842e-07,
"loss": 0.0001,
"step": 9150
},
{
"epoch": 28.85,
"learning_rate": 6.972631578947367e-07,
"loss": 0.0001,
"step": 9175
},
{
"epoch": 28.93,
"learning_rate": 6.762105263157895e-07,
"loss": 0.0001,
"step": 9200
},
{
"epoch": 29.01,
"learning_rate": 6.55157894736842e-07,
"loss": 0.0001,
"step": 9225
},
{
"epoch": 29.09,
"learning_rate": 6.341052631578947e-07,
"loss": 0.0001,
"step": 9250
},
{
"epoch": 29.17,
"learning_rate": 6.130526315789474e-07,
"loss": 0.0001,
"step": 9275
},
{
"epoch": 29.25,
"learning_rate": 5.919999999999999e-07,
"loss": 0.0001,
"step": 9300
},
{
"epoch": 29.32,
"learning_rate": 5.709473684210527e-07,
"loss": 0.0001,
"step": 9325
},
{
"epoch": 29.4,
"learning_rate": 5.498947368421052e-07,
"loss": 0.0001,
"step": 9350
},
{
"epoch": 29.48,
"learning_rate": 5.288421052631579e-07,
"loss": 0.0001,
"step": 9375
},
{
"epoch": 29.56,
"learning_rate": 5.077894736842105e-07,
"loss": 0.0001,
"step": 9400
},
{
"epoch": 29.64,
"learning_rate": 4.867368421052631e-07,
"loss": 0.0001,
"step": 9425
},
{
"epoch": 29.72,
"learning_rate": 4.656842105263158e-07,
"loss": 0.0001,
"step": 9450
},
{
"epoch": 29.8,
"learning_rate": 4.4463157894736843e-07,
"loss": 0.0001,
"step": 9475
},
{
"epoch": 29.87,
"learning_rate": 4.2357894736842103e-07,
"loss": 0.0001,
"step": 9500
},
{
"epoch": 29.95,
"learning_rate": 4.0252631578947364e-07,
"loss": 0.0001,
"step": 9525
},
{
"epoch": 30.03,
"learning_rate": 3.814736842105263e-07,
"loss": 0.0001,
"step": 9550
},
{
"epoch": 30.11,
"learning_rate": 3.604210526315789e-07,
"loss": 0.0001,
"step": 9575
},
{
"epoch": 30.19,
"learning_rate": 3.393684210526316e-07,
"loss": 0.0001,
"step": 9600
},
{
"epoch": 30.27,
"learning_rate": 3.183157894736842e-07,
"loss": 0.0001,
"step": 9625
},
{
"epoch": 30.35,
"learning_rate": 2.972631578947368e-07,
"loss": 0.0001,
"step": 9650
},
{
"epoch": 30.42,
"learning_rate": 2.7621052631578946e-07,
"loss": 0.0001,
"step": 9675
},
{
"epoch": 30.5,
"learning_rate": 2.5515789473684206e-07,
"loss": 0.0001,
"step": 9700
},
{
"epoch": 30.58,
"learning_rate": 2.3410526315789472e-07,
"loss": 0.0001,
"step": 9725
},
{
"epoch": 30.66,
"learning_rate": 2.1305263157894734e-07,
"loss": 0.0001,
"step": 9750
},
{
"epoch": 30.74,
"learning_rate": 1.92e-07,
"loss": 0.0001,
"step": 9775
},
{
"epoch": 30.82,
"learning_rate": 1.7094736842105263e-07,
"loss": 0.0001,
"step": 9800
},
{
"epoch": 30.9,
"learning_rate": 1.4989473684210526e-07,
"loss": 0.0001,
"step": 9825
},
{
"epoch": 30.97,
"learning_rate": 1.2884210526315788e-07,
"loss": 0.0001,
"step": 9850
},
{
"epoch": 31.05,
"learning_rate": 1.0778947368421053e-07,
"loss": 0.0001,
"step": 9875
},
{
"epoch": 31.13,
"learning_rate": 8.673684210526315e-08,
"loss": 0.0001,
"step": 9900
},
{
"epoch": 31.21,
"learning_rate": 6.568421052631578e-08,
"loss": 0.0001,
"step": 9925
},
{
"epoch": 31.29,
"learning_rate": 4.463157894736842e-08,
"loss": 0.0001,
"step": 9950
},
{
"epoch": 31.37,
"learning_rate": 2.3578947368421052e-08,
"loss": 0.0001,
"step": 9975
},
{
"epoch": 31.45,
"learning_rate": 2.526315789473684e-09,
"loss": 0.0001,
"step": 10000
},
{
"epoch": 31.45,
"eval_loss": 0.5813759565353394,
"eval_runtime": 1234.2331,
"eval_samples_per_second": 1.728,
"eval_steps_per_second": 0.216,
"eval_wer": 13.708574434508153,
"step": 10000
},
{
"epoch": 31.45,
"step": 10000,
"total_flos": 3.263088216372019e+20,
"train_loss": 0.0230206538159051,
"train_runtime": 56008.0991,
"train_samples_per_second": 5.713,
"train_steps_per_second": 0.179
}
],
"max_steps": 10000,
"num_train_epochs": 32,
"total_flos": 3.263088216372019e+20,
"trial_name": null,
"trial_params": null
}