{ "best_metric": null, "best_model_checkpoint": null, "epoch": 24.449877750611247, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24, "learning_rate": 9.4e-06, "loss": 0.3047, "step": 50 }, { "epoch": 0.49, "learning_rate": 9.905050505050506e-06, "loss": 0.179, "step": 100 }, { "epoch": 0.73, "learning_rate": 9.804040404040405e-06, "loss": 0.1628, "step": 150 }, { "epoch": 0.98, "learning_rate": 9.703030303030305e-06, "loss": 0.1468, "step": 200 }, { "epoch": 1.22, "learning_rate": 9.602020202020203e-06, "loss": 0.0966, "step": 250 }, { "epoch": 1.47, "learning_rate": 9.501010101010102e-06, "loss": 0.0854, "step": 300 }, { "epoch": 1.71, "learning_rate": 9.4e-06, "loss": 0.0881, "step": 350 }, { "epoch": 1.96, "learning_rate": 9.2989898989899e-06, "loss": 0.0841, "step": 400 }, { "epoch": 2.2, "learning_rate": 9.197979797979799e-06, "loss": 0.0541, "step": 450 }, { "epoch": 2.44, "learning_rate": 9.096969696969698e-06, "loss": 0.0523, "step": 500 }, { "epoch": 2.44, "eval_loss": 0.21230381727218628, "eval_runtime": 1923.3381, "eval_samples_per_second": 1.505, "eval_steps_per_second": 0.188, "eval_wer": 0.2664437484127656, "step": 500 }, { "epoch": 2.69, "learning_rate": 8.995959595959598e-06, "loss": 0.0502, "step": 550 }, { "epoch": 2.93, "learning_rate": 8.896969696969697e-06, "loss": 0.0468, "step": 600 }, { "epoch": 3.18, "learning_rate": 8.795959595959596e-06, "loss": 0.0328, "step": 650 }, { "epoch": 3.42, "learning_rate": 8.694949494949496e-06, "loss": 0.0273, "step": 700 }, { "epoch": 3.67, "learning_rate": 8.593939393939395e-06, "loss": 0.0283, "step": 750 }, { "epoch": 3.91, "learning_rate": 8.492929292929295e-06, "loss": 0.0316, "step": 800 }, { "epoch": 4.16, "learning_rate": 8.391919191919192e-06, "loss": 0.0194, "step": 850 }, { "epoch": 4.4, "learning_rate": 8.290909090909092e-06, "loss": 0.0151, "step": 900 }, { "epoch": 4.65, "learning_rate": 8.18989898989899e-06, "loss": 0.0164, "step": 950 }, { "epoch": 4.89, "learning_rate": 8.08888888888889e-06, "loss": 0.0187, "step": 1000 }, { "epoch": 4.89, "eval_loss": 0.22370614111423492, "eval_runtime": 1885.8595, "eval_samples_per_second": 1.535, "eval_steps_per_second": 0.192, "eval_wer": 0.23702700414797256, "step": 1000 }, { "epoch": 5.13, "learning_rate": 7.987878787878789e-06, "loss": 0.0122, "step": 1050 }, { "epoch": 5.38, "learning_rate": 7.886868686868686e-06, "loss": 0.0104, "step": 1100 }, { "epoch": 5.62, "learning_rate": 7.785858585858586e-06, "loss": 0.0102, "step": 1150 }, { "epoch": 5.87, "learning_rate": 7.684848484848485e-06, "loss": 0.0124, "step": 1200 }, { "epoch": 6.11, "learning_rate": 7.583838383838384e-06, "loss": 0.0084, "step": 1250 }, { "epoch": 6.36, "learning_rate": 7.4828282828282835e-06, "loss": 0.0067, "step": 1300 }, { "epoch": 6.6, "learning_rate": 7.381818181818182e-06, "loss": 0.0067, "step": 1350 }, { "epoch": 6.85, "learning_rate": 7.280808080808082e-06, "loss": 0.0074, "step": 1400 }, { "epoch": 7.09, "learning_rate": 7.17979797979798e-06, "loss": 0.0059, "step": 1450 }, { "epoch": 7.33, "learning_rate": 7.07878787878788e-06, "loss": 0.0041, "step": 1500 }, { "epoch": 7.33, "eval_loss": 0.26467418670654297, "eval_runtime": 1880.1582, "eval_samples_per_second": 1.539, "eval_steps_per_second": 0.193, "eval_wer": 0.23097435029205113, "step": 1500 }, { "epoch": 7.58, "learning_rate": 6.977777777777779e-06, "loss": 0.0038, "step": 1550 }, { "epoch": 7.82, "learning_rate": 6.876767676767677e-06, "loss": 0.005, "step": 1600 }, { "epoch": 8.07, "learning_rate": 6.7757575757575765e-06, "loss": 0.0035, "step": 1650 }, { "epoch": 8.31, "learning_rate": 6.674747474747475e-06, "loss": 0.0034, "step": 1700 }, { "epoch": 8.56, "learning_rate": 6.5737373737373746e-06, "loss": 0.0034, "step": 1750 }, { "epoch": 8.8, "learning_rate": 6.472727272727272e-06, "loss": 0.0045, "step": 1800 }, { "epoch": 9.05, "learning_rate": 6.371717171717172e-06, "loss": 0.0034, "step": 1850 }, { "epoch": 9.29, "learning_rate": 6.270707070707071e-06, "loss": 0.0025, "step": 1900 }, { "epoch": 9.54, "learning_rate": 6.16969696969697e-06, "loss": 0.0038, "step": 1950 }, { "epoch": 9.78, "learning_rate": 6.068686868686869e-06, "loss": 0.0028, "step": 2000 }, { "epoch": 9.78, "eval_loss": 0.29039227962493896, "eval_runtime": 1891.4656, "eval_samples_per_second": 1.53, "eval_steps_per_second": 0.191, "eval_wer": 0.23436045035130787, "step": 2000 }, { "epoch": 10.02, "learning_rate": 5.967676767676768e-06, "loss": 0.0021, "step": 2050 }, { "epoch": 10.27, "learning_rate": 5.8666666666666675e-06, "loss": 0.0015, "step": 2100 }, { "epoch": 10.51, "learning_rate": 5.765656565656567e-06, "loss": 0.0018, "step": 2150 }, { "epoch": 10.76, "learning_rate": 5.664646464646465e-06, "loss": 0.0019, "step": 2200 }, { "epoch": 11.0, "learning_rate": 5.563636363636364e-06, "loss": 0.0023, "step": 2250 }, { "epoch": 11.25, "learning_rate": 5.462626262626263e-06, "loss": 0.0015, "step": 2300 }, { "epoch": 11.49, "learning_rate": 5.361616161616162e-06, "loss": 0.0018, "step": 2350 }, { "epoch": 11.74, "learning_rate": 5.26060606060606e-06, "loss": 0.002, "step": 2400 }, { "epoch": 11.98, "learning_rate": 5.15959595959596e-06, "loss": 0.0014, "step": 2450 }, { "epoch": 12.22, "learning_rate": 5.058585858585859e-06, "loss": 0.0015, "step": 2500 }, { "epoch": 12.22, "eval_loss": 0.29083308577537537, "eval_runtime": 1888.3025, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.192, "eval_wer": 0.2268263777194616, "step": 2500 }, { "epoch": 12.47, "learning_rate": 4.957575757575758e-06, "loss": 0.0012, "step": 2550 }, { "epoch": 12.71, "learning_rate": 4.856565656565657e-06, "loss": 0.0012, "step": 2600 }, { "epoch": 12.96, "learning_rate": 4.755555555555556e-06, "loss": 0.001, "step": 2650 }, { "epoch": 13.2, "learning_rate": 4.654545454545455e-06, "loss": 0.0008, "step": 2700 }, { "epoch": 13.45, "learning_rate": 4.553535353535354e-06, "loss": 0.0007, "step": 2750 }, { "epoch": 13.69, "learning_rate": 4.452525252525253e-06, "loss": 0.0004, "step": 2800 }, { "epoch": 13.94, "learning_rate": 4.351515151515152e-06, "loss": 0.0005, "step": 2850 }, { "epoch": 14.18, "learning_rate": 4.250505050505051e-06, "loss": 0.0004, "step": 2900 }, { "epoch": 14.43, "learning_rate": 4.14949494949495e-06, "loss": 0.0004, "step": 2950 }, { "epoch": 14.67, "learning_rate": 4.048484848484849e-06, "loss": 0.0003, "step": 3000 }, { "epoch": 14.67, "eval_loss": 0.3021999001502991, "eval_runtime": 1889.0309, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.192, "eval_wer": 0.21967324134428173, "step": 3000 }, { "epoch": 14.91, "learning_rate": 3.9474747474747474e-06, "loss": 0.0003, "step": 3050 }, { "epoch": 15.16, "learning_rate": 3.846464646464647e-06, "loss": 0.0002, "step": 3100 }, { "epoch": 15.4, "learning_rate": 3.745454545454546e-06, "loss": 0.0001, "step": 3150 }, { "epoch": 15.65, "learning_rate": 3.644444444444445e-06, "loss": 0.0004, "step": 3200 }, { "epoch": 15.89, "learning_rate": 3.5434343434343437e-06, "loss": 0.0001, "step": 3250 }, { "epoch": 16.14, "learning_rate": 3.4424242424242427e-06, "loss": 0.0001, "step": 3300 }, { "epoch": 16.38, "learning_rate": 3.3414141414141413e-06, "loss": 0.0001, "step": 3350 }, { "epoch": 16.63, "learning_rate": 3.2404040404040404e-06, "loss": 0.0001, "step": 3400 }, { "epoch": 16.87, "learning_rate": 3.13939393939394e-06, "loss": 0.0001, "step": 3450 }, { "epoch": 17.11, "learning_rate": 3.038383838383839e-06, "loss": 0.0003, "step": 3500 }, { "epoch": 17.11, "eval_loss": 0.3248833417892456, "eval_runtime": 1885.9913, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.192, "eval_wer": 0.2195462625920596, "step": 3500 }, { "epoch": 17.36, "learning_rate": 2.9373737373737376e-06, "loss": 0.0001, "step": 3550 }, { "epoch": 17.6, "learning_rate": 2.8363636363636366e-06, "loss": 0.0004, "step": 3600 }, { "epoch": 17.85, "learning_rate": 2.7353535353535353e-06, "loss": 0.0001, "step": 3650 }, { "epoch": 18.09, "learning_rate": 2.6343434343434343e-06, "loss": 0.0001, "step": 3700 }, { "epoch": 18.34, "learning_rate": 2.5333333333333338e-06, "loss": 0.0002, "step": 3750 }, { "epoch": 18.58, "learning_rate": 2.432323232323233e-06, "loss": 0.0001, "step": 3800 }, { "epoch": 18.83, "learning_rate": 2.3313131313131315e-06, "loss": 0.0002, "step": 3850 }, { "epoch": 19.07, "learning_rate": 2.2303030303030305e-06, "loss": 0.0001, "step": 3900 }, { "epoch": 19.32, "learning_rate": 2.1292929292929296e-06, "loss": 0.0001, "step": 3950 }, { "epoch": 19.56, "learning_rate": 2.0282828282828286e-06, "loss": 0.0003, "step": 4000 }, { "epoch": 19.56, "eval_loss": 0.3216637372970581, "eval_runtime": 1889.5891, "eval_samples_per_second": 1.532, "eval_steps_per_second": 0.192, "eval_wer": 0.21611783628206213, "step": 4000 }, { "epoch": 19.8, "learning_rate": 1.9272727272727273e-06, "loss": 0.0001, "step": 4050 }, { "epoch": 20.05, "learning_rate": 1.8262626262626265e-06, "loss": 0.0001, "step": 4100 }, { "epoch": 20.29, "learning_rate": 1.7252525252525254e-06, "loss": 0.0002, "step": 4150 }, { "epoch": 20.54, "learning_rate": 1.6242424242424242e-06, "loss": 0.0001, "step": 4200 }, { "epoch": 20.78, "learning_rate": 1.5232323232323235e-06, "loss": 0.0, "step": 4250 }, { "epoch": 21.03, "learning_rate": 1.4222222222222223e-06, "loss": 0.0, "step": 4300 }, { "epoch": 21.27, "learning_rate": 1.3212121212121212e-06, "loss": 0.0, "step": 4350 }, { "epoch": 21.52, "learning_rate": 1.2202020202020202e-06, "loss": 0.0, "step": 4400 }, { "epoch": 21.76, "learning_rate": 1.1191919191919193e-06, "loss": 0.0, "step": 4450 }, { "epoch": 22.0, "learning_rate": 1.0181818181818183e-06, "loss": 0.0, "step": 4500 }, { "epoch": 22.0, "eval_loss": 0.3335433900356293, "eval_runtime": 1886.0688, "eval_samples_per_second": 1.534, "eval_steps_per_second": 0.192, "eval_wer": 0.21814949631761618, "step": 4500 }, { "epoch": 22.25, "learning_rate": 9.171717171717172e-07, "loss": 0.0, "step": 4550 }, { "epoch": 22.49, "learning_rate": 8.161616161616162e-07, "loss": 0.0, "step": 4600 }, { "epoch": 22.74, "learning_rate": 7.151515151515153e-07, "loss": 0.0, "step": 4650 }, { "epoch": 22.98, "learning_rate": 6.141414141414142e-07, "loss": 0.0, "step": 4700 }, { "epoch": 23.23, "learning_rate": 5.131313131313132e-07, "loss": 0.0, "step": 4750 }, { "epoch": 23.47, "learning_rate": 4.121212121212122e-07, "loss": 0.0, "step": 4800 }, { "epoch": 23.72, "learning_rate": 3.111111111111111e-07, "loss": 0.0, "step": 4850 }, { "epoch": 23.96, "learning_rate": 2.1010101010101013e-07, "loss": 0.0, "step": 4900 }, { "epoch": 24.21, "learning_rate": 1.090909090909091e-07, "loss": 0.0, "step": 4950 }, { "epoch": 24.45, "learning_rate": 8.080808080808081e-09, "loss": 0.0, "step": 5000 }, { "epoch": 24.45, "eval_loss": 0.33888712525367737, "eval_runtime": 1887.2294, "eval_samples_per_second": 1.533, "eval_steps_per_second": 0.192, "eval_wer": 0.21857275882502328, "step": 5000 }, { "epoch": 24.45, "step": 5000, "total_flos": 3.395035389100032e+20, "train_loss": 0.016903733740281313, "train_runtime": 68439.6404, "train_samples_per_second": 2.338, "train_steps_per_second": 0.073 } ], "logging_steps": 50, "max_steps": 5000, "num_train_epochs": 25, "save_steps": 500, "total_flos": 3.395035389100032e+20, "trial_name": null, "trial_params": null }