{ "best_metric": 8.60641891891892, "best_model_checkpoint": "./checkpoint-9000", "epoch": 10.97694840834248, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 1.94e-06, "loss": 0.8124, "step": 100 }, { "epoch": 0.22, "learning_rate": 3.94e-06, "loss": 0.3235, "step": 200 }, { "epoch": 0.33, "learning_rate": 5.94e-06, "loss": 0.2396, "step": 300 }, { "epoch": 0.44, "learning_rate": 7.94e-06, "loss": 0.2138, "step": 400 }, { "epoch": 0.55, "learning_rate": 9.940000000000001e-06, "loss": 0.1972, "step": 500 }, { "epoch": 0.66, "learning_rate": 9.897894736842107e-06, "loss": 0.1885, "step": 600 }, { "epoch": 0.77, "learning_rate": 9.79263157894737e-06, "loss": 0.1792, "step": 700 }, { "epoch": 0.88, "learning_rate": 9.687368421052632e-06, "loss": 0.174, "step": 800 }, { "epoch": 0.99, "learning_rate": 9.582105263157897e-06, "loss": 0.1669, "step": 900 }, { "epoch": 1.1, "learning_rate": 9.476842105263158e-06, "loss": 0.1106, "step": 1000 }, { "epoch": 1.1, "eval_cer": 6.4784271464430745, "eval_loss": 0.18272116780281067, "eval_runtime": 4332.708, "eval_samples_per_second": 1.063, "eval_steps_per_second": 0.066, "eval_wer": 10.347972972972972, "step": 1000 }, { "epoch": 1.21, "learning_rate": 9.371578947368421e-06, "loss": 0.1083, "step": 1100 }, { "epoch": 1.32, "learning_rate": 9.266315789473685e-06, "loss": 0.1043, "step": 1200 }, { "epoch": 1.43, "learning_rate": 9.161052631578948e-06, "loss": 0.1016, "step": 1300 }, { "epoch": 1.54, "learning_rate": 9.055789473684211e-06, "loss": 0.1023, "step": 1400 }, { "epoch": 1.65, "learning_rate": 8.950526315789474e-06, "loss": 0.101, "step": 1500 }, { "epoch": 1.76, "learning_rate": 8.845263157894738e-06, "loss": 0.099, "step": 1600 }, { "epoch": 1.87, "learning_rate": 8.740000000000001e-06, "loss": 0.1005, "step": 1700 }, { "epoch": 1.98, "learning_rate": 8.634736842105264e-06, "loss": 0.1002, "step": 1800 }, { "epoch": 2.09, "learning_rate": 8.529473684210528e-06, "loss": 0.0601, "step": 1900 }, { "epoch": 2.2, "learning_rate": 8.42421052631579e-06, "loss": 0.0487, "step": 2000 }, { "epoch": 2.2, "eval_cer": 5.912657213073819, "eval_loss": 0.17993855476379395, "eval_runtime": 4312.9082, "eval_samples_per_second": 1.067, "eval_steps_per_second": 0.067, "eval_wer": 9.47635135135135, "step": 2000 }, { "epoch": 2.31, "learning_rate": 8.318947368421052e-06, "loss": 0.0508, "step": 2100 }, { "epoch": 2.41, "learning_rate": 8.213684210526316e-06, "loss": 0.0501, "step": 2200 }, { "epoch": 2.52, "learning_rate": 8.10842105263158e-06, "loss": 0.0506, "step": 2300 }, { "epoch": 2.63, "learning_rate": 8.003157894736842e-06, "loss": 0.0498, "step": 2400 }, { "epoch": 2.74, "learning_rate": 7.897894736842106e-06, "loss": 0.0486, "step": 2500 }, { "epoch": 2.85, "learning_rate": 7.792631578947369e-06, "loss": 0.0513, "step": 2600 }, { "epoch": 2.96, "learning_rate": 7.687368421052632e-06, "loss": 0.0504, "step": 2700 }, { "epoch": 3.07, "learning_rate": 7.582105263157895e-06, "loss": 0.0307, "step": 2800 }, { "epoch": 3.18, "learning_rate": 7.476842105263159e-06, "loss": 0.0239, "step": 2900 }, { "epoch": 3.29, "learning_rate": 7.371578947368422e-06, "loss": 0.0243, "step": 3000 }, { "epoch": 3.29, "eval_cer": 5.806862835289325, "eval_loss": 0.19502821564674377, "eval_runtime": 4299.7561, "eval_samples_per_second": 1.071, "eval_steps_per_second": 0.067, "eval_wer": 9.21114864864865, "step": 3000 }, { "epoch": 3.4, "learning_rate": 7.266315789473684e-06, "loss": 0.024, "step": 3100 }, { "epoch": 3.51, "learning_rate": 7.161052631578948e-06, "loss": 0.0252, "step": 3200 }, { "epoch": 3.62, "learning_rate": 7.055789473684212e-06, "loss": 0.0244, "step": 3300 }, { "epoch": 3.73, "learning_rate": 6.950526315789474e-06, "loss": 0.0249, "step": 3400 }, { "epoch": 3.84, "learning_rate": 6.845263157894737e-06, "loss": 0.0247, "step": 3500 }, { "epoch": 3.95, "learning_rate": 6.740000000000001e-06, "loss": 0.0245, "step": 3600 }, { "epoch": 4.06, "learning_rate": 6.634736842105264e-06, "loss": 0.0173, "step": 3700 }, { "epoch": 4.17, "learning_rate": 6.529473684210526e-06, "loss": 0.0106, "step": 3800 }, { "epoch": 4.28, "learning_rate": 6.42421052631579e-06, "loss": 0.0116, "step": 3900 }, { "epoch": 4.39, "learning_rate": 6.318947368421054e-06, "loss": 0.0106, "step": 4000 }, { "epoch": 4.39, "eval_cer": 5.575560841623846, "eval_loss": 0.21126572787761688, "eval_runtime": 4218.4134, "eval_samples_per_second": 1.091, "eval_steps_per_second": 0.068, "eval_wer": 8.971283783783784, "step": 4000 }, { "epoch": 4.5, "learning_rate": 6.213684210526316e-06, "loss": 0.0115, "step": 4100 }, { "epoch": 4.61, "learning_rate": 6.108421052631579e-06, "loss": 0.0105, "step": 4200 }, { "epoch": 4.72, "learning_rate": 6.003157894736843e-06, "loss": 0.0104, "step": 4300 }, { "epoch": 4.83, "learning_rate": 5.897894736842106e-06, "loss": 0.0123, "step": 4400 }, { "epoch": 4.94, "learning_rate": 5.7926315789473685e-06, "loss": 0.0116, "step": 4500 }, { "epoch": 5.05, "learning_rate": 5.687368421052633e-06, "loss": 0.0079, "step": 4600 }, { "epoch": 5.16, "learning_rate": 5.582105263157895e-06, "loss": 0.0053, "step": 4700 }, { "epoch": 5.27, "learning_rate": 5.476842105263158e-06, "loss": 0.0052, "step": 4800 }, { "epoch": 5.38, "learning_rate": 5.371578947368421e-06, "loss": 0.0053, "step": 4900 }, { "epoch": 5.49, "learning_rate": 5.266315789473685e-06, "loss": 0.0054, "step": 5000 }, { "epoch": 5.49, "eval_cer": 5.404055670184384, "eval_loss": 0.23248465359210968, "eval_runtime": 4237.1053, "eval_samples_per_second": 1.087, "eval_steps_per_second": 0.068, "eval_wer": 8.64695945945946, "step": 5000 }, { "epoch": 5.6, "learning_rate": 5.161052631578948e-06, "loss": 0.0053, "step": 5100 }, { "epoch": 5.71, "learning_rate": 5.0557894736842105e-06, "loss": 0.0056, "step": 5200 }, { "epoch": 5.82, "learning_rate": 4.950526315789474e-06, "loss": 0.005, "step": 5300 }, { "epoch": 5.93, "learning_rate": 4.845263157894737e-06, "loss": 0.0054, "step": 5400 }, { "epoch": 6.04, "learning_rate": 4.74e-06, "loss": 0.0045, "step": 5500 }, { "epoch": 6.15, "learning_rate": 4.634736842105264e-06, "loss": 0.0029, "step": 5600 }, { "epoch": 6.26, "learning_rate": 4.529473684210527e-06, "loss": 0.0024, "step": 5700 }, { "epoch": 6.37, "learning_rate": 4.424210526315789e-06, "loss": 0.0025, "step": 5800 }, { "epoch": 6.48, "learning_rate": 4.3189473684210535e-06, "loss": 0.0026, "step": 5900 }, { "epoch": 6.59, "learning_rate": 4.213684210526316e-06, "loss": 0.0031, "step": 6000 }, { "epoch": 6.59, "eval_cer": 5.440853714631165, "eval_loss": 0.24615277349948883, "eval_runtime": 4218.0606, "eval_samples_per_second": 1.091, "eval_steps_per_second": 0.068, "eval_wer": 8.70777027027027, "step": 6000 }, { "epoch": 6.7, "learning_rate": 4.108421052631579e-06, "loss": 0.0028, "step": 6100 }, { "epoch": 6.81, "learning_rate": 4.0031578947368424e-06, "loss": 0.0026, "step": 6200 }, { "epoch": 6.92, "learning_rate": 3.897894736842106e-06, "loss": 0.0027, "step": 6300 }, { "epoch": 7.03, "learning_rate": 3.792631578947369e-06, "loss": 0.0021, "step": 6400 }, { "epoch": 7.14, "learning_rate": 3.687368421052632e-06, "loss": 0.0014, "step": 6500 }, { "epoch": 7.24, "learning_rate": 3.582105263157895e-06, "loss": 0.0015, "step": 6600 }, { "epoch": 7.35, "learning_rate": 3.476842105263158e-06, "loss": 0.001, "step": 6700 }, { "epoch": 7.46, "learning_rate": 3.3715789473684212e-06, "loss": 0.0015, "step": 6800 }, { "epoch": 7.57, "learning_rate": 3.2663157894736845e-06, "loss": 0.0013, "step": 6900 }, { "epoch": 7.68, "learning_rate": 3.1610526315789474e-06, "loss": 0.0014, "step": 7000 }, { "epoch": 7.68, "eval_cer": 5.484879946379992, "eval_loss": 0.26076802611351013, "eval_runtime": 4205.2214, "eval_samples_per_second": 1.095, "eval_steps_per_second": 0.068, "eval_wer": 8.714527027027028, "step": 7000 }, { "epoch": 7.79, "learning_rate": 3.055789473684211e-06, "loss": 0.0013, "step": 7100 }, { "epoch": 7.9, "learning_rate": 2.9505263157894735e-06, "loss": 0.0013, "step": 7200 }, { "epoch": 8.01, "learning_rate": 2.845263157894737e-06, "loss": 0.0018, "step": 7300 }, { "epoch": 8.12, "learning_rate": 2.7400000000000004e-06, "loss": 0.0011, "step": 7400 }, { "epoch": 8.23, "learning_rate": 2.6347368421052633e-06, "loss": 0.0009, "step": 7500 }, { "epoch": 8.34, "learning_rate": 2.5294736842105266e-06, "loss": 0.0008, "step": 7600 }, { "epoch": 8.45, "learning_rate": 2.42421052631579e-06, "loss": 0.0008, "step": 7700 }, { "epoch": 8.56, "learning_rate": 2.3189473684210527e-06, "loss": 0.0008, "step": 7800 }, { "epoch": 8.67, "learning_rate": 2.213684210526316e-06, "loss": 0.0008, "step": 7900 }, { "epoch": 8.78, "learning_rate": 2.1084210526315792e-06, "loss": 0.0009, "step": 8000 }, { "epoch": 8.78, "eval_cer": 5.387627971770643, "eval_loss": 0.269546777009964, "eval_runtime": 4205.0269, "eval_samples_per_second": 1.095, "eval_steps_per_second": 0.068, "eval_wer": 8.630067567567568, "step": 8000 }, { "epoch": 8.89, "learning_rate": 2.003157894736842e-06, "loss": 0.0007, "step": 8100 }, { "epoch": 9.0, "learning_rate": 1.8978947368421056e-06, "loss": 0.0009, "step": 8200 }, { "epoch": 9.11, "learning_rate": 1.7926315789473686e-06, "loss": 0.0007, "step": 8300 }, { "epoch": 9.22, "learning_rate": 1.6873684210526317e-06, "loss": 0.0006, "step": 8400 }, { "epoch": 9.33, "learning_rate": 1.5821052631578948e-06, "loss": 0.0005, "step": 8500 }, { "epoch": 9.44, "learning_rate": 1.4768421052631578e-06, "loss": 0.0005, "step": 8600 }, { "epoch": 9.55, "learning_rate": 1.3715789473684213e-06, "loss": 0.0006, "step": 8700 }, { "epoch": 9.66, "learning_rate": 1.2663157894736844e-06, "loss": 0.0005, "step": 8800 }, { "epoch": 9.77, "learning_rate": 1.1610526315789474e-06, "loss": 0.0005, "step": 8900 }, { "epoch": 9.88, "learning_rate": 1.0557894736842105e-06, "loss": 0.0004, "step": 9000 }, { "epoch": 9.88, "eval_cer": 5.352801251133512, "eval_loss": 0.2793508470058441, "eval_runtime": 4203.0281, "eval_samples_per_second": 1.095, "eval_steps_per_second": 0.069, "eval_wer": 8.60641891891892, "step": 9000 }, { "epoch": 9.99, "learning_rate": 9.505263157894738e-07, "loss": 0.0005, "step": 9100 }, { "epoch": 10.1, "learning_rate": 8.452631578947369e-07, "loss": 0.0005, "step": 9200 }, { "epoch": 10.21, "learning_rate": 7.4e-07, "loss": 0.0003, "step": 9300 }, { "epoch": 10.32, "learning_rate": 6.347368421052633e-07, "loss": 0.0004, "step": 9400 }, { "epoch": 10.43, "learning_rate": 5.294736842105263e-07, "loss": 0.0003, "step": 9500 }, { "epoch": 10.54, "learning_rate": 4.242105263157895e-07, "loss": 0.0004, "step": 9600 }, { "epoch": 10.65, "learning_rate": 3.1894736842105263e-07, "loss": 0.0003, "step": 9700 }, { "epoch": 10.76, "learning_rate": 2.136842105263158e-07, "loss": 0.0003, "step": 9800 }, { "epoch": 10.87, "learning_rate": 1.0842105263157895e-07, "loss": 0.0004, "step": 9900 }, { "epoch": 10.98, "learning_rate": 3.1578947368421054e-09, "loss": 0.0003, "step": 10000 }, { "epoch": 10.98, "eval_cer": 5.469766463839351, "eval_loss": 0.2836119830608368, "eval_runtime": 4263.2576, "eval_samples_per_second": 1.08, "eval_steps_per_second": 0.068, "eval_wer": 8.721283783783784, "step": 10000 }, { "epoch": 10.98, "step": 10000, "total_flos": 3.265323341119488e+20, "train_loss": 0.043234722255170346, "train_runtime": 85032.8136, "train_samples_per_second": 3.763, "train_steps_per_second": 0.118 } ], "max_steps": 10000, "num_train_epochs": 11, "total_flos": 3.265323341119488e+20, "trial_name": null, "trial_params": null }