whisper-medium-eu / trainer_state.json
xezpeleta's picture
End of training
34f406d
raw
history blame
40.5 kB
{
"best_metric": 14.119648426424725,
"best_model_checkpoint": "./checkpoint-8000",
"epoch": 2.176,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.76e-07,
"loss": 1.4248,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 5.64e-07,
"loss": 1.3686,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 8.64e-07,
"loss": 1.1529,
"step": 75
},
{
"epoch": 0.01,
"learning_rate": 1.164e-06,
"loss": 0.8748,
"step": 100
},
{
"epoch": 0.02,
"learning_rate": 1.464e-06,
"loss": 0.7645,
"step": 125
},
{
"epoch": 0.02,
"learning_rate": 1.764e-06,
"loss": 0.71,
"step": 150
},
{
"epoch": 0.02,
"learning_rate": 2.064e-06,
"loss": 0.7174,
"step": 175
},
{
"epoch": 0.03,
"learning_rate": 2.364e-06,
"loss": 0.6369,
"step": 200
},
{
"epoch": 0.03,
"learning_rate": 2.6640000000000002e-06,
"loss": 0.5867,
"step": 225
},
{
"epoch": 0.03,
"learning_rate": 2.964e-06,
"loss": 0.6348,
"step": 250
},
{
"epoch": 0.03,
"learning_rate": 3.2640000000000004e-06,
"loss": 0.6222,
"step": 275
},
{
"epoch": 0.04,
"learning_rate": 3.564e-06,
"loss": 0.5447,
"step": 300
},
{
"epoch": 0.04,
"learning_rate": 3.864000000000001e-06,
"loss": 0.5093,
"step": 325
},
{
"epoch": 0.04,
"learning_rate": 4.1639999999999994e-06,
"loss": 0.5134,
"step": 350
},
{
"epoch": 0.05,
"learning_rate": 4.464e-06,
"loss": 0.4913,
"step": 375
},
{
"epoch": 0.05,
"learning_rate": 4.7640000000000005e-06,
"loss": 0.5007,
"step": 400
},
{
"epoch": 0.05,
"learning_rate": 5.064e-06,
"loss": 0.4233,
"step": 425
},
{
"epoch": 0.06,
"learning_rate": 5.364000000000001e-06,
"loss": 0.3944,
"step": 450
},
{
"epoch": 0.06,
"learning_rate": 5.6639999999999995e-06,
"loss": 0.4615,
"step": 475
},
{
"epoch": 0.06,
"learning_rate": 5.964e-06,
"loss": 0.443,
"step": 500
},
{
"epoch": 0.06,
"eval_loss": 0.5036891102790833,
"eval_runtime": 1812.9009,
"eval_samples_per_second": 3.636,
"eval_steps_per_second": 0.455,
"eval_wer": 37.42962452914254,
"step": 500
},
{
"epoch": 0.07,
"learning_rate": 5.9824e-06,
"loss": 0.458,
"step": 525
},
{
"epoch": 0.07,
"learning_rate": 5.962400000000001e-06,
"loss": 0.4264,
"step": 550
},
{
"epoch": 0.07,
"learning_rate": 5.9424e-06,
"loss": 0.4405,
"step": 575
},
{
"epoch": 0.07,
"learning_rate": 5.9224e-06,
"loss": 0.4451,
"step": 600
},
{
"epoch": 0.08,
"learning_rate": 5.9024000000000004e-06,
"loss": 0.416,
"step": 625
},
{
"epoch": 0.08,
"learning_rate": 5.882400000000001e-06,
"loss": 0.4193,
"step": 650
},
{
"epoch": 0.08,
"learning_rate": 5.8624e-06,
"loss": 0.4029,
"step": 675
},
{
"epoch": 0.09,
"learning_rate": 5.8424e-06,
"loss": 0.4028,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 5.8224e-06,
"loss": 0.3613,
"step": 725
},
{
"epoch": 0.09,
"learning_rate": 5.8024e-06,
"loss": 0.3995,
"step": 750
},
{
"epoch": 0.1,
"learning_rate": 5.7824e-06,
"loss": 0.3865,
"step": 775
},
{
"epoch": 0.1,
"learning_rate": 5.7624e-06,
"loss": 0.3534,
"step": 800
},
{
"epoch": 0.1,
"learning_rate": 5.7424e-06,
"loss": 0.3741,
"step": 825
},
{
"epoch": 0.11,
"learning_rate": 5.7224000000000005e-06,
"loss": 0.4243,
"step": 850
},
{
"epoch": 0.11,
"learning_rate": 5.702400000000001e-06,
"loss": 0.387,
"step": 875
},
{
"epoch": 0.11,
"learning_rate": 5.6824e-06,
"loss": 0.3726,
"step": 900
},
{
"epoch": 0.12,
"learning_rate": 5.6624e-06,
"loss": 0.4141,
"step": 925
},
{
"epoch": 0.12,
"learning_rate": 5.6424e-06,
"loss": 0.3553,
"step": 950
},
{
"epoch": 0.12,
"learning_rate": 5.622400000000001e-06,
"loss": 0.4188,
"step": 975
},
{
"epoch": 0.12,
"learning_rate": 5.6024e-06,
"loss": 0.4196,
"step": 1000
},
{
"epoch": 0.12,
"eval_loss": 0.40096473693847656,
"eval_runtime": 1827.7488,
"eval_samples_per_second": 3.606,
"eval_steps_per_second": 0.451,
"eval_wer": 28.913686257037547,
"step": 1000
},
{
"epoch": 0.13,
"learning_rate": 5.5824e-06,
"loss": 0.4134,
"step": 1025
},
{
"epoch": 0.13,
"learning_rate": 5.5624e-06,
"loss": 0.3777,
"step": 1050
},
{
"epoch": 0.13,
"learning_rate": 5.5424e-06,
"loss": 0.3565,
"step": 1075
},
{
"epoch": 0.14,
"learning_rate": 5.5224e-06,
"loss": 0.3878,
"step": 1100
},
{
"epoch": 0.14,
"learning_rate": 5.5024e-06,
"loss": 0.3691,
"step": 1125
},
{
"epoch": 0.14,
"learning_rate": 5.4824e-06,
"loss": 0.3647,
"step": 1150
},
{
"epoch": 0.15,
"learning_rate": 5.4624e-06,
"loss": 0.3352,
"step": 1175
},
{
"epoch": 0.15,
"learning_rate": 5.442400000000001e-06,
"loss": 0.3047,
"step": 1200
},
{
"epoch": 0.15,
"learning_rate": 5.422400000000001e-06,
"loss": 0.2753,
"step": 1225
},
{
"epoch": 0.16,
"learning_rate": 5.4024e-06,
"loss": 0.3258,
"step": 1250
},
{
"epoch": 0.16,
"learning_rate": 5.3824e-06,
"loss": 0.3405,
"step": 1275
},
{
"epoch": 0.16,
"learning_rate": 5.3624000000000005e-06,
"loss": 0.3316,
"step": 1300
},
{
"epoch": 0.17,
"learning_rate": 5.3424e-06,
"loss": 0.3187,
"step": 1325
},
{
"epoch": 0.17,
"learning_rate": 5.3224e-06,
"loss": 0.2521,
"step": 1350
},
{
"epoch": 0.17,
"learning_rate": 5.3024e-06,
"loss": 0.3445,
"step": 1375
},
{
"epoch": 0.17,
"learning_rate": 5.2824e-06,
"loss": 0.2715,
"step": 1400
},
{
"epoch": 0.18,
"learning_rate": 5.2624e-06,
"loss": 0.2951,
"step": 1425
},
{
"epoch": 0.18,
"learning_rate": 5.2424e-06,
"loss": 0.3858,
"step": 1450
},
{
"epoch": 0.18,
"learning_rate": 5.2224e-06,
"loss": 0.3253,
"step": 1475
},
{
"epoch": 0.19,
"learning_rate": 5.2024e-06,
"loss": 0.2823,
"step": 1500
},
{
"epoch": 0.19,
"eval_loss": 0.3452778458595276,
"eval_runtime": 1811.7894,
"eval_samples_per_second": 3.638,
"eval_steps_per_second": 0.455,
"eval_wer": 24.685082425371625,
"step": 1500
},
{
"epoch": 0.19,
"learning_rate": 5.1824000000000006e-06,
"loss": 0.2764,
"step": 1525
},
{
"epoch": 0.19,
"learning_rate": 5.162400000000001e-06,
"loss": 0.2853,
"step": 1550
},
{
"epoch": 0.2,
"learning_rate": 5.1424e-06,
"loss": 0.353,
"step": 1575
},
{
"epoch": 0.2,
"learning_rate": 5.1224e-06,
"loss": 0.3318,
"step": 1600
},
{
"epoch": 0.2,
"learning_rate": 5.1024000000000005e-06,
"loss": 0.3729,
"step": 1625
},
{
"epoch": 0.21,
"learning_rate": 5.0824e-06,
"loss": 0.3314,
"step": 1650
},
{
"epoch": 0.21,
"learning_rate": 5.0624e-06,
"loss": 0.3193,
"step": 1675
},
{
"epoch": 0.21,
"learning_rate": 5.0424e-06,
"loss": 0.2881,
"step": 1700
},
{
"epoch": 0.22,
"learning_rate": 5.0223999999999996e-06,
"loss": 0.2961,
"step": 1725
},
{
"epoch": 0.22,
"learning_rate": 5.0024e-06,
"loss": 0.3286,
"step": 1750
},
{
"epoch": 0.22,
"learning_rate": 4.9824e-06,
"loss": 0.3192,
"step": 1775
},
{
"epoch": 0.23,
"learning_rate": 4.9624e-06,
"loss": 0.3183,
"step": 1800
},
{
"epoch": 0.23,
"learning_rate": 4.9424e-06,
"loss": 0.2706,
"step": 1825
},
{
"epoch": 0.23,
"learning_rate": 4.9224000000000005e-06,
"loss": 0.3048,
"step": 1850
},
{
"epoch": 0.23,
"learning_rate": 4.902400000000001e-06,
"loss": 0.2524,
"step": 1875
},
{
"epoch": 0.24,
"learning_rate": 4.8824e-06,
"loss": 0.2882,
"step": 1900
},
{
"epoch": 0.24,
"learning_rate": 4.8624e-06,
"loss": 0.2466,
"step": 1925
},
{
"epoch": 0.24,
"learning_rate": 4.8424000000000004e-06,
"loss": 0.1995,
"step": 1950
},
{
"epoch": 0.25,
"learning_rate": 4.8224e-06,
"loss": 0.2497,
"step": 1975
},
{
"epoch": 0.25,
"learning_rate": 4.8024e-06,
"loss": 0.2551,
"step": 2000
},
{
"epoch": 0.25,
"eval_loss": 0.3163716495037079,
"eval_runtime": 1819.6056,
"eval_samples_per_second": 3.622,
"eval_steps_per_second": 0.453,
"eval_wer": 22.57888128316254,
"step": 2000
},
{
"epoch": 0.25,
"learning_rate": 4.7824e-06,
"loss": 0.2133,
"step": 2025
},
{
"epoch": 0.26,
"learning_rate": 4.7623999999999995e-06,
"loss": 0.2474,
"step": 2050
},
{
"epoch": 0.26,
"learning_rate": 4.7424e-06,
"loss": 0.2302,
"step": 2075
},
{
"epoch": 0.26,
"learning_rate": 4.7232e-06,
"loss": 0.3048,
"step": 2100
},
{
"epoch": 0.27,
"learning_rate": 4.7032e-06,
"loss": 0.2593,
"step": 2125
},
{
"epoch": 0.27,
"learning_rate": 4.6832e-06,
"loss": 0.2605,
"step": 2150
},
{
"epoch": 0.27,
"learning_rate": 4.6632000000000005e-06,
"loss": 0.2847,
"step": 2175
},
{
"epoch": 0.28,
"learning_rate": 4.643200000000001e-06,
"loss": 0.2519,
"step": 2200
},
{
"epoch": 0.28,
"learning_rate": 4.6232e-06,
"loss": 0.2576,
"step": 2225
},
{
"epoch": 0.28,
"learning_rate": 4.6032e-06,
"loss": 0.2261,
"step": 2250
},
{
"epoch": 0.28,
"learning_rate": 4.5832000000000004e-06,
"loss": 0.2777,
"step": 2275
},
{
"epoch": 0.29,
"learning_rate": 4.5632e-06,
"loss": 0.2586,
"step": 2300
},
{
"epoch": 0.29,
"learning_rate": 4.5432e-06,
"loss": 0.2482,
"step": 2325
},
{
"epoch": 0.29,
"learning_rate": 4.5232e-06,
"loss": 0.2068,
"step": 2350
},
{
"epoch": 0.3,
"learning_rate": 4.5032e-06,
"loss": 0.2333,
"step": 2375
},
{
"epoch": 0.3,
"learning_rate": 4.4832e-06,
"loss": 0.2451,
"step": 2400
},
{
"epoch": 0.3,
"learning_rate": 4.4632e-06,
"loss": 0.2199,
"step": 2425
},
{
"epoch": 0.31,
"learning_rate": 4.4432e-06,
"loss": 0.2519,
"step": 2450
},
{
"epoch": 0.31,
"learning_rate": 4.4232e-06,
"loss": 0.2314,
"step": 2475
},
{
"epoch": 0.31,
"learning_rate": 4.4032000000000005e-06,
"loss": 0.206,
"step": 2500
},
{
"epoch": 0.31,
"eval_loss": 0.29017817974090576,
"eval_runtime": 1826.6674,
"eval_samples_per_second": 3.608,
"eval_steps_per_second": 0.451,
"eval_wer": 19.79221515654745,
"step": 2500
},
{
"epoch": 0.32,
"learning_rate": 4.383200000000001e-06,
"loss": 0.2336,
"step": 2525
},
{
"epoch": 0.32,
"learning_rate": 4.3632e-06,
"loss": 0.255,
"step": 2550
},
{
"epoch": 0.32,
"learning_rate": 4.3432e-06,
"loss": 0.2523,
"step": 2575
},
{
"epoch": 0.33,
"learning_rate": 4.3232e-06,
"loss": 0.2916,
"step": 2600
},
{
"epoch": 0.33,
"learning_rate": 4.3032e-06,
"loss": 0.1934,
"step": 2625
},
{
"epoch": 0.33,
"learning_rate": 4.2832e-06,
"loss": 0.245,
"step": 2650
},
{
"epoch": 0.33,
"learning_rate": 4.2632e-06,
"loss": 0.2059,
"step": 2675
},
{
"epoch": 0.34,
"learning_rate": 4.2432e-06,
"loss": 0.2197,
"step": 2700
},
{
"epoch": 0.34,
"learning_rate": 4.2232e-06,
"loss": 0.2108,
"step": 2725
},
{
"epoch": 0.34,
"learning_rate": 4.2032e-06,
"loss": 0.2485,
"step": 2750
},
{
"epoch": 0.35,
"learning_rate": 4.1832e-06,
"loss": 0.201,
"step": 2775
},
{
"epoch": 0.35,
"learning_rate": 4.1632e-06,
"loss": 0.2343,
"step": 2800
},
{
"epoch": 0.35,
"learning_rate": 4.1432e-06,
"loss": 0.2088,
"step": 2825
},
{
"epoch": 0.36,
"learning_rate": 4.123200000000001e-06,
"loss": 0.2294,
"step": 2850
},
{
"epoch": 0.36,
"learning_rate": 4.1032e-06,
"loss": 0.2226,
"step": 2875
},
{
"epoch": 0.36,
"learning_rate": 4.0832e-06,
"loss": 0.1993,
"step": 2900
},
{
"epoch": 0.37,
"learning_rate": 4.0632e-06,
"loss": 0.2006,
"step": 2925
},
{
"epoch": 0.37,
"learning_rate": 4.0432e-06,
"loss": 0.2385,
"step": 2950
},
{
"epoch": 0.37,
"learning_rate": 4.0232e-06,
"loss": 0.2225,
"step": 2975
},
{
"epoch": 0.38,
"learning_rate": 4.0032e-06,
"loss": 0.2327,
"step": 3000
},
{
"epoch": 0.38,
"eval_loss": 0.2706596553325653,
"eval_runtime": 1809.8842,
"eval_samples_per_second": 3.642,
"eval_steps_per_second": 0.455,
"eval_wer": 18.935558345822027,
"step": 3000
},
{
"epoch": 0.38,
"learning_rate": 3.9832e-06,
"loss": 0.1988,
"step": 3025
},
{
"epoch": 0.38,
"learning_rate": 3.9632e-06,
"loss": 0.2117,
"step": 3050
},
{
"epoch": 0.38,
"learning_rate": 3.9432e-06,
"loss": 0.1887,
"step": 3075
},
{
"epoch": 0.39,
"learning_rate": 3.9232e-06,
"loss": 0.1975,
"step": 3100
},
{
"epoch": 0.39,
"learning_rate": 3.9032e-06,
"loss": 0.2138,
"step": 3125
},
{
"epoch": 0.39,
"learning_rate": 3.8832e-06,
"loss": 0.2082,
"step": 3150
},
{
"epoch": 0.4,
"learning_rate": 3.8632000000000006e-06,
"loss": 0.2365,
"step": 3175
},
{
"epoch": 0.4,
"learning_rate": 3.8432e-06,
"loss": 0.1919,
"step": 3200
},
{
"epoch": 0.4,
"learning_rate": 3.8232e-06,
"loss": 0.2334,
"step": 3225
},
{
"epoch": 0.41,
"learning_rate": 3.8032000000000003e-06,
"loss": 0.2743,
"step": 3250
},
{
"epoch": 0.41,
"learning_rate": 3.7831999999999996e-06,
"loss": 0.1992,
"step": 3275
},
{
"epoch": 1.0,
"learning_rate": 3.7632e-06,
"loss": 0.236,
"step": 3300
},
{
"epoch": 1.0,
"learning_rate": 3.7432e-06,
"loss": 0.2389,
"step": 3325
},
{
"epoch": 1.01,
"learning_rate": 3.7232e-06,
"loss": 0.2236,
"step": 3350
},
{
"epoch": 1.01,
"learning_rate": 3.7032e-06,
"loss": 0.2187,
"step": 3375
},
{
"epoch": 1.01,
"learning_rate": 3.6832e-06,
"loss": 0.2022,
"step": 3400
},
{
"epoch": 1.02,
"learning_rate": 3.6632000000000004e-06,
"loss": 0.1943,
"step": 3425
},
{
"epoch": 1.02,
"learning_rate": 3.6431999999999997e-06,
"loss": 0.1858,
"step": 3450
},
{
"epoch": 1.02,
"learning_rate": 3.6232e-06,
"loss": 0.1762,
"step": 3475
},
{
"epoch": 1.03,
"learning_rate": 3.6032e-06,
"loss": 0.1416,
"step": 3500
},
{
"epoch": 1.03,
"eval_loss": 0.25662127137184143,
"eval_runtime": 1825.0473,
"eval_samples_per_second": 3.611,
"eval_steps_per_second": 0.451,
"eval_wer": 17.69208959455628,
"step": 3500
},
{
"epoch": 1.03,
"learning_rate": 3.5832e-06,
"loss": 0.1777,
"step": 3525
},
{
"epoch": 1.03,
"learning_rate": 3.5632e-06,
"loss": 0.1934,
"step": 3550
},
{
"epoch": 1.03,
"learning_rate": 3.5432000000000002e-06,
"loss": 0.1566,
"step": 3575
},
{
"epoch": 1.04,
"learning_rate": 3.5232000000000004e-06,
"loss": 0.1635,
"step": 3600
},
{
"epoch": 1.04,
"learning_rate": 3.5031999999999998e-06,
"loss": 0.1687,
"step": 3625
},
{
"epoch": 1.04,
"learning_rate": 3.4832e-06,
"loss": 0.1216,
"step": 3650
},
{
"epoch": 1.05,
"learning_rate": 3.4632000000000006e-06,
"loss": 0.1197,
"step": 3675
},
{
"epoch": 1.05,
"learning_rate": 3.4432e-06,
"loss": 0.1202,
"step": 3700
},
{
"epoch": 1.05,
"learning_rate": 3.4232e-06,
"loss": 0.1162,
"step": 3725
},
{
"epoch": 1.06,
"learning_rate": 3.4032000000000003e-06,
"loss": 0.1157,
"step": 3750
},
{
"epoch": 1.06,
"learning_rate": 3.3831999999999996e-06,
"loss": 0.1333,
"step": 3775
},
{
"epoch": 1.06,
"learning_rate": 3.3632000000000003e-06,
"loss": 0.099,
"step": 3800
},
{
"epoch": 1.07,
"learning_rate": 3.3432000000000004e-06,
"loss": 0.1247,
"step": 3825
},
{
"epoch": 1.07,
"learning_rate": 3.3232e-06,
"loss": 0.112,
"step": 3850
},
{
"epoch": 1.07,
"learning_rate": 3.3032e-06,
"loss": 0.1092,
"step": 3875
},
{
"epoch": 1.08,
"learning_rate": 3.2832e-06,
"loss": 0.1422,
"step": 3900
},
{
"epoch": 1.08,
"learning_rate": 3.2632000000000004e-06,
"loss": 0.1294,
"step": 3925
},
{
"epoch": 1.08,
"learning_rate": 3.2432e-06,
"loss": 0.1185,
"step": 3950
},
{
"epoch": 1.08,
"learning_rate": 3.2232000000000003e-06,
"loss": 0.1345,
"step": 3975
},
{
"epoch": 1.09,
"learning_rate": 3.2032000000000005e-06,
"loss": 0.0998,
"step": 4000
},
{
"epoch": 1.09,
"eval_loss": 0.25507599115371704,
"eval_runtime": 1832.0695,
"eval_samples_per_second": 3.598,
"eval_steps_per_second": 0.45,
"eval_wer": 16.821256429989067,
"step": 4000
},
{
"epoch": 1.09,
"learning_rate": 3.1832e-06,
"loss": 0.1089,
"step": 4025
},
{
"epoch": 1.09,
"learning_rate": 3.1632e-06,
"loss": 0.1012,
"step": 4050
},
{
"epoch": 1.1,
"learning_rate": 3.1432000000000002e-06,
"loss": 0.1139,
"step": 4075
},
{
"epoch": 1.1,
"learning_rate": 3.1232e-06,
"loss": 0.1186,
"step": 4100
},
{
"epoch": 1.1,
"learning_rate": 3.104e-06,
"loss": 0.1189,
"step": 4125
},
{
"epoch": 1.11,
"learning_rate": 3.084e-06,
"loss": 0.0963,
"step": 4150
},
{
"epoch": 1.11,
"learning_rate": 3.0640000000000002e-06,
"loss": 0.1048,
"step": 4175
},
{
"epoch": 1.11,
"learning_rate": 3.044e-06,
"loss": 0.1172,
"step": 4200
},
{
"epoch": 1.12,
"learning_rate": 3.024e-06,
"loss": 0.1075,
"step": 4225
},
{
"epoch": 1.12,
"learning_rate": 3.0040000000000004e-06,
"loss": 0.1362,
"step": 4250
},
{
"epoch": 1.12,
"learning_rate": 2.984e-06,
"loss": 0.1227,
"step": 4275
},
{
"epoch": 1.13,
"learning_rate": 2.964e-06,
"loss": 0.1109,
"step": 4300
},
{
"epoch": 1.13,
"learning_rate": 2.944e-06,
"loss": 0.1078,
"step": 4325
},
{
"epoch": 1.13,
"learning_rate": 2.9240000000000003e-06,
"loss": 0.1169,
"step": 4350
},
{
"epoch": 1.13,
"learning_rate": 2.904e-06,
"loss": 0.0874,
"step": 4375
},
{
"epoch": 1.14,
"learning_rate": 2.8840000000000003e-06,
"loss": 0.0942,
"step": 4400
},
{
"epoch": 1.14,
"learning_rate": 2.864e-06,
"loss": 0.0842,
"step": 4425
},
{
"epoch": 1.14,
"learning_rate": 2.844e-06,
"loss": 0.0776,
"step": 4450
},
{
"epoch": 1.15,
"learning_rate": 2.824e-06,
"loss": 0.0973,
"step": 4475
},
{
"epoch": 1.15,
"learning_rate": 2.804e-06,
"loss": 0.095,
"step": 4500
},
{
"epoch": 1.15,
"eval_loss": 0.2510645389556885,
"eval_runtime": 1808.3002,
"eval_samples_per_second": 3.645,
"eval_steps_per_second": 0.456,
"eval_wer": 16.389890234517395,
"step": 4500
},
{
"epoch": 1.15,
"learning_rate": 2.7840000000000004e-06,
"loss": 0.0869,
"step": 4525
},
{
"epoch": 1.16,
"learning_rate": 2.764e-06,
"loss": 0.0864,
"step": 4550
},
{
"epoch": 1.16,
"learning_rate": 2.744e-06,
"loss": 0.1199,
"step": 4575
},
{
"epoch": 1.16,
"learning_rate": 2.724e-06,
"loss": 0.0972,
"step": 4600
},
{
"epoch": 1.17,
"learning_rate": 2.704e-06,
"loss": 0.0819,
"step": 4625
},
{
"epoch": 1.17,
"learning_rate": 2.684e-06,
"loss": 0.0767,
"step": 4650
},
{
"epoch": 1.17,
"learning_rate": 2.6640000000000002e-06,
"loss": 0.088,
"step": 4675
},
{
"epoch": 1.18,
"learning_rate": 2.644e-06,
"loss": 0.1127,
"step": 4700
},
{
"epoch": 1.18,
"learning_rate": 2.624e-06,
"loss": 0.0818,
"step": 4725
},
{
"epoch": 1.18,
"learning_rate": 2.604e-06,
"loss": 0.086,
"step": 4750
},
{
"epoch": 1.18,
"learning_rate": 2.5839999999999997e-06,
"loss": 0.095,
"step": 4775
},
{
"epoch": 1.19,
"learning_rate": 2.564e-06,
"loss": 0.0833,
"step": 4800
},
{
"epoch": 1.19,
"learning_rate": 2.544e-06,
"loss": 0.0818,
"step": 4825
},
{
"epoch": 1.19,
"learning_rate": 2.5240000000000003e-06,
"loss": 0.0801,
"step": 4850
},
{
"epoch": 1.2,
"learning_rate": 2.504e-06,
"loss": 0.0969,
"step": 4875
},
{
"epoch": 1.2,
"learning_rate": 2.484e-06,
"loss": 0.0883,
"step": 4900
},
{
"epoch": 1.2,
"learning_rate": 2.464e-06,
"loss": 0.0877,
"step": 4925
},
{
"epoch": 1.21,
"learning_rate": 2.444e-06,
"loss": 0.0964,
"step": 4950
},
{
"epoch": 1.21,
"learning_rate": 2.4240000000000004e-06,
"loss": 0.107,
"step": 4975
},
{
"epoch": 1.21,
"learning_rate": 2.404e-06,
"loss": 0.0971,
"step": 5000
},
{
"epoch": 1.21,
"eval_loss": 0.2415408343076706,
"eval_runtime": 1804.4355,
"eval_samples_per_second": 3.653,
"eval_steps_per_second": 0.457,
"eval_wer": 15.539309004009883,
"step": 5000
},
{
"epoch": 1.22,
"learning_rate": 2.384e-06,
"loss": 0.1077,
"step": 5025
},
{
"epoch": 1.22,
"learning_rate": 2.364e-06,
"loss": 0.0712,
"step": 5050
},
{
"epoch": 1.22,
"learning_rate": 2.344e-06,
"loss": 0.0749,
"step": 5075
},
{
"epoch": 1.23,
"learning_rate": 2.324e-06,
"loss": 0.0851,
"step": 5100
},
{
"epoch": 1.23,
"learning_rate": 2.3040000000000003e-06,
"loss": 0.0851,
"step": 5125
},
{
"epoch": 1.23,
"learning_rate": 2.284e-06,
"loss": 0.0782,
"step": 5150
},
{
"epoch": 1.23,
"learning_rate": 2.2640000000000003e-06,
"loss": 0.0795,
"step": 5175
},
{
"epoch": 1.24,
"learning_rate": 2.244e-06,
"loss": 0.0611,
"step": 5200
},
{
"epoch": 1.24,
"learning_rate": 2.224e-06,
"loss": 0.0752,
"step": 5225
},
{
"epoch": 1.24,
"learning_rate": 2.204e-06,
"loss": 0.0787,
"step": 5250
},
{
"epoch": 1.25,
"learning_rate": 2.184e-06,
"loss": 0.0717,
"step": 5275
},
{
"epoch": 1.25,
"learning_rate": 2.1640000000000004e-06,
"loss": 0.0748,
"step": 5300
},
{
"epoch": 1.25,
"learning_rate": 2.144e-06,
"loss": 0.0586,
"step": 5325
},
{
"epoch": 1.26,
"learning_rate": 2.124e-06,
"loss": 0.0719,
"step": 5350
},
{
"epoch": 1.26,
"learning_rate": 2.104e-06,
"loss": 0.0868,
"step": 5375
},
{
"epoch": 1.26,
"learning_rate": 2.084e-06,
"loss": 0.0631,
"step": 5400
},
{
"epoch": 1.27,
"learning_rate": 2.064e-06,
"loss": 0.0767,
"step": 5425
},
{
"epoch": 1.27,
"learning_rate": 2.0440000000000003e-06,
"loss": 0.0834,
"step": 5450
},
{
"epoch": 1.27,
"learning_rate": 2.024e-06,
"loss": 0.0767,
"step": 5475
},
{
"epoch": 1.28,
"learning_rate": 2.004e-06,
"loss": 0.0964,
"step": 5500
},
{
"epoch": 1.28,
"eval_loss": 0.2336428314447403,
"eval_runtime": 1826.0414,
"eval_samples_per_second": 3.609,
"eval_steps_per_second": 0.451,
"eval_wer": 15.170723804123293,
"step": 5500
},
{
"epoch": 1.28,
"learning_rate": 1.984e-06,
"loss": 0.0813,
"step": 5525
},
{
"epoch": 1.28,
"learning_rate": 1.9639999999999997e-06,
"loss": 0.0517,
"step": 5550
},
{
"epoch": 1.28,
"learning_rate": 1.944e-06,
"loss": 0.079,
"step": 5575
},
{
"epoch": 1.29,
"learning_rate": 1.924e-06,
"loss": 0.0787,
"step": 5600
},
{
"epoch": 1.29,
"learning_rate": 1.9040000000000001e-06,
"loss": 0.0537,
"step": 5625
},
{
"epoch": 1.29,
"learning_rate": 1.884e-06,
"loss": 0.0696,
"step": 5650
},
{
"epoch": 1.3,
"learning_rate": 1.8639999999999999e-06,
"loss": 0.0737,
"step": 5675
},
{
"epoch": 1.3,
"learning_rate": 1.844e-06,
"loss": 0.0698,
"step": 5700
},
{
"epoch": 1.3,
"learning_rate": 1.824e-06,
"loss": 0.0831,
"step": 5725
},
{
"epoch": 1.31,
"learning_rate": 1.8040000000000002e-06,
"loss": 0.0919,
"step": 5750
},
{
"epoch": 1.31,
"learning_rate": 1.784e-06,
"loss": 0.0846,
"step": 5775
},
{
"epoch": 1.31,
"learning_rate": 1.764e-06,
"loss": 0.0653,
"step": 5800
},
{
"epoch": 1.32,
"learning_rate": 1.7440000000000002e-06,
"loss": 0.0676,
"step": 5825
},
{
"epoch": 1.32,
"learning_rate": 1.724e-06,
"loss": 0.0559,
"step": 5850
},
{
"epoch": 1.32,
"learning_rate": 1.704e-06,
"loss": 0.0659,
"step": 5875
},
{
"epoch": 1.33,
"learning_rate": 1.684e-06,
"loss": 0.0693,
"step": 5900
},
{
"epoch": 1.33,
"learning_rate": 1.6639999999999999e-06,
"loss": 0.0582,
"step": 5925
},
{
"epoch": 1.33,
"learning_rate": 1.6440000000000003e-06,
"loss": 0.1016,
"step": 5950
},
{
"epoch": 1.33,
"learning_rate": 1.624e-06,
"loss": 0.064,
"step": 5975
},
{
"epoch": 1.34,
"learning_rate": 1.604e-06,
"loss": 0.072,
"step": 6000
},
{
"epoch": 1.34,
"eval_loss": 0.23533816635608673,
"eval_runtime": 1816.0058,
"eval_samples_per_second": 3.629,
"eval_steps_per_second": 0.454,
"eval_wer": 14.75960954271133,
"step": 6000
},
{
"epoch": 1.34,
"learning_rate": 1.5840000000000002e-06,
"loss": 0.0494,
"step": 6025
},
{
"epoch": 1.34,
"learning_rate": 1.564e-06,
"loss": 0.0647,
"step": 6050
},
{
"epoch": 1.35,
"learning_rate": 1.5440000000000002e-06,
"loss": 0.073,
"step": 6075
},
{
"epoch": 1.35,
"learning_rate": 1.5240000000000001e-06,
"loss": 0.0758,
"step": 6100
},
{
"epoch": 1.35,
"learning_rate": 1.504e-06,
"loss": 0.0473,
"step": 6125
},
{
"epoch": 1.36,
"learning_rate": 1.484e-06,
"loss": 0.0645,
"step": 6150
},
{
"epoch": 1.36,
"learning_rate": 1.464e-06,
"loss": 0.0544,
"step": 6175
},
{
"epoch": 1.36,
"learning_rate": 1.444e-06,
"loss": 0.0674,
"step": 6200
},
{
"epoch": 1.37,
"learning_rate": 1.424e-06,
"loss": 0.0721,
"step": 6225
},
{
"epoch": 1.37,
"learning_rate": 1.404e-06,
"loss": 0.0668,
"step": 6250
},
{
"epoch": 1.37,
"learning_rate": 1.384e-06,
"loss": 0.0587,
"step": 6275
},
{
"epoch": 1.38,
"learning_rate": 1.364e-06,
"loss": 0.0494,
"step": 6300
},
{
"epoch": 1.38,
"learning_rate": 1.344e-06,
"loss": 0.0802,
"step": 6325
},
{
"epoch": 1.38,
"learning_rate": 1.3240000000000002e-06,
"loss": 0.0636,
"step": 6350
},
{
"epoch": 1.38,
"learning_rate": 1.304e-06,
"loss": 0.0499,
"step": 6375
},
{
"epoch": 1.39,
"learning_rate": 1.284e-06,
"loss": 0.0666,
"step": 6400
},
{
"epoch": 1.39,
"learning_rate": 1.264e-06,
"loss": 0.0508,
"step": 6425
},
{
"epoch": 1.39,
"learning_rate": 1.244e-06,
"loss": 0.0472,
"step": 6450
},
{
"epoch": 1.4,
"learning_rate": 1.224e-06,
"loss": 0.0643,
"step": 6475
},
{
"epoch": 1.4,
"learning_rate": 1.204e-06,
"loss": 0.0658,
"step": 6500
},
{
"epoch": 1.4,
"eval_loss": 0.23401623964309692,
"eval_runtime": 1798.9797,
"eval_samples_per_second": 3.664,
"eval_steps_per_second": 0.458,
"eval_wer": 14.676576613066548,
"step": 6500
},
{
"epoch": 1.4,
"learning_rate": 1.1848e-06,
"loss": 0.0671,
"step": 6525
},
{
"epoch": 1.41,
"learning_rate": 1.1648e-06,
"loss": 0.0707,
"step": 6550
},
{
"epoch": 1.41,
"learning_rate": 1.1448e-06,
"loss": 0.0723,
"step": 6575
},
{
"epoch": 2.0,
"learning_rate": 1.1248e-06,
"loss": 0.0625,
"step": 6600
},
{
"epoch": 2.0,
"learning_rate": 1.1048e-06,
"loss": 0.0843,
"step": 6625
},
{
"epoch": 2.01,
"learning_rate": 1.0848e-06,
"loss": 0.087,
"step": 6650
},
{
"epoch": 2.01,
"learning_rate": 1.0648e-06,
"loss": 0.0561,
"step": 6675
},
{
"epoch": 2.01,
"learning_rate": 1.0448e-06,
"loss": 0.063,
"step": 6700
},
{
"epoch": 2.02,
"learning_rate": 1.0248000000000001e-06,
"loss": 0.0596,
"step": 6725
},
{
"epoch": 2.02,
"learning_rate": 1.0048e-06,
"loss": 0.0506,
"step": 6750
},
{
"epoch": 2.02,
"learning_rate": 9.848e-07,
"loss": 0.0526,
"step": 6775
},
{
"epoch": 2.03,
"learning_rate": 9.648e-07,
"loss": 0.0479,
"step": 6800
},
{
"epoch": 2.03,
"learning_rate": 9.448e-07,
"loss": 0.0494,
"step": 6825
},
{
"epoch": 2.03,
"learning_rate": 9.248000000000001e-07,
"loss": 0.0564,
"step": 6850
},
{
"epoch": 2.04,
"learning_rate": 9.048e-07,
"loss": 0.0499,
"step": 6875
},
{
"epoch": 2.04,
"learning_rate": 8.848e-07,
"loss": 0.0593,
"step": 6900
},
{
"epoch": 2.04,
"learning_rate": 8.648000000000001e-07,
"loss": 0.049,
"step": 6925
},
{
"epoch": 2.04,
"learning_rate": 8.448e-07,
"loss": 0.0527,
"step": 6950
},
{
"epoch": 2.05,
"learning_rate": 8.247999999999999e-07,
"loss": 0.0283,
"step": 6975
},
{
"epoch": 2.05,
"learning_rate": 8.048e-07,
"loss": 0.033,
"step": 7000
},
{
"epoch": 2.05,
"eval_loss": 0.2349175214767456,
"eval_runtime": 1811.228,
"eval_samples_per_second": 3.639,
"eval_steps_per_second": 0.455,
"eval_wer": 14.376847988982949,
"step": 7000
},
{
"epoch": 2.05,
"learning_rate": 7.848e-07,
"loss": 0.033,
"step": 7025
},
{
"epoch": 2.06,
"learning_rate": 7.648000000000001e-07,
"loss": 0.0397,
"step": 7050
},
{
"epoch": 2.06,
"learning_rate": 7.448e-07,
"loss": 0.0271,
"step": 7075
},
{
"epoch": 2.06,
"learning_rate": 7.248e-07,
"loss": 0.0404,
"step": 7100
},
{
"epoch": 2.07,
"learning_rate": 7.048e-07,
"loss": 0.0348,
"step": 7125
},
{
"epoch": 2.07,
"learning_rate": 6.848e-07,
"loss": 0.0279,
"step": 7150
},
{
"epoch": 2.07,
"learning_rate": 6.648e-07,
"loss": 0.0296,
"step": 7175
},
{
"epoch": 2.08,
"learning_rate": 6.448000000000001e-07,
"loss": 0.0473,
"step": 7200
},
{
"epoch": 2.08,
"learning_rate": 6.247999999999999e-07,
"loss": 0.0287,
"step": 7225
},
{
"epoch": 2.08,
"learning_rate": 6.048e-07,
"loss": 0.0377,
"step": 7250
},
{
"epoch": 2.09,
"learning_rate": 5.848e-07,
"loss": 0.042,
"step": 7275
},
{
"epoch": 2.09,
"learning_rate": 5.648e-07,
"loss": 0.0375,
"step": 7300
},
{
"epoch": 2.09,
"learning_rate": 5.448000000000001e-07,
"loss": 0.0367,
"step": 7325
},
{
"epoch": 2.09,
"learning_rate": 5.248e-07,
"loss": 0.0398,
"step": 7350
},
{
"epoch": 2.1,
"learning_rate": 5.048e-07,
"loss": 0.0381,
"step": 7375
},
{
"epoch": 2.1,
"learning_rate": 4.848e-07,
"loss": 0.0266,
"step": 7400
},
{
"epoch": 2.1,
"learning_rate": 4.6480000000000003e-07,
"loss": 0.0321,
"step": 7425
},
{
"epoch": 2.11,
"learning_rate": 4.4479999999999996e-07,
"loss": 0.0351,
"step": 7450
},
{
"epoch": 2.11,
"learning_rate": 4.2480000000000005e-07,
"loss": 0.0359,
"step": 7475
},
{
"epoch": 2.11,
"learning_rate": 4.0479999999999997e-07,
"loss": 0.0288,
"step": 7500
},
{
"epoch": 2.11,
"eval_loss": 0.23708966374397278,
"eval_runtime": 1812.1644,
"eval_samples_per_second": 3.637,
"eval_steps_per_second": 0.455,
"eval_wer": 14.186479808821742,
"step": 7500
},
{
"epoch": 2.12,
"learning_rate": 3.848e-07,
"loss": 0.0363,
"step": 7525
},
{
"epoch": 2.12,
"learning_rate": 3.648e-07,
"loss": 0.0351,
"step": 7550
},
{
"epoch": 2.12,
"learning_rate": 3.448e-07,
"loss": 0.029,
"step": 7575
},
{
"epoch": 2.13,
"learning_rate": 3.248e-07,
"loss": 0.0381,
"step": 7600
},
{
"epoch": 2.13,
"learning_rate": 3.048e-07,
"loss": 0.0299,
"step": 7625
},
{
"epoch": 2.13,
"learning_rate": 2.848e-07,
"loss": 0.0334,
"step": 7650
},
{
"epoch": 2.14,
"learning_rate": 2.648e-07,
"loss": 0.0273,
"step": 7675
},
{
"epoch": 2.14,
"learning_rate": 2.448e-07,
"loss": 0.0357,
"step": 7700
},
{
"epoch": 2.14,
"learning_rate": 2.2480000000000003e-07,
"loss": 0.0386,
"step": 7725
},
{
"epoch": 2.14,
"learning_rate": 2.048e-07,
"loss": 0.0307,
"step": 7750
},
{
"epoch": 2.15,
"learning_rate": 1.8480000000000001e-07,
"loss": 0.0319,
"step": 7775
},
{
"epoch": 2.15,
"learning_rate": 1.648e-07,
"loss": 0.0197,
"step": 7800
},
{
"epoch": 2.15,
"learning_rate": 1.448e-07,
"loss": 0.0328,
"step": 7825
},
{
"epoch": 2.16,
"learning_rate": 1.248e-07,
"loss": 0.0317,
"step": 7850
},
{
"epoch": 2.16,
"learning_rate": 1.048e-07,
"loss": 0.0264,
"step": 7875
},
{
"epoch": 2.16,
"learning_rate": 8.48e-08,
"loss": 0.029,
"step": 7900
},
{
"epoch": 2.17,
"learning_rate": 6.480000000000001e-08,
"loss": 0.0224,
"step": 7925
},
{
"epoch": 2.17,
"learning_rate": 4.48e-08,
"loss": 0.0297,
"step": 7950
},
{
"epoch": 2.17,
"learning_rate": 2.48e-08,
"loss": 0.0337,
"step": 7975
},
{
"epoch": 2.18,
"learning_rate": 4.8e-09,
"loss": 0.0352,
"step": 8000
},
{
"epoch": 2.18,
"eval_loss": 0.23755376040935516,
"eval_runtime": 1811.3928,
"eval_samples_per_second": 3.639,
"eval_steps_per_second": 0.455,
"eval_wer": 14.119648426424725,
"step": 8000
},
{
"epoch": 2.18,
"step": 8000,
"total_flos": 3.265527462100992e+19,
"train_loss": 0.191678307980299,
"train_runtime": 39548.417,
"train_samples_per_second": 0.809,
"train_steps_per_second": 0.202
}
],
"max_steps": 8000,
"num_train_epochs": 9223372036854775807,
"total_flos": 3.265527462100992e+19,
"trial_name": null,
"trial_params": null
}