|
{ |
|
"best_metric": 8.60641891891892, |
|
"best_model_checkpoint": "./checkpoint-9000", |
|
"epoch": 10.97694840834248, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.94e-06, |
|
"loss": 0.8124, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.94e-06, |
|
"loss": 0.3235, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5.94e-06, |
|
"loss": 0.2396, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.94e-06, |
|
"loss": 0.2138, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.940000000000001e-06, |
|
"loss": 0.1972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.897894736842107e-06, |
|
"loss": 0.1885, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.79263157894737e-06, |
|
"loss": 0.1792, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.687368421052632e-06, |
|
"loss": 0.174, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.582105263157897e-06, |
|
"loss": 0.1669, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.476842105263158e-06, |
|
"loss": 0.1106, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_cer": 6.4784271464430745, |
|
"eval_loss": 0.18272116780281067, |
|
"eval_runtime": 4332.708, |
|
"eval_samples_per_second": 1.063, |
|
"eval_steps_per_second": 0.066, |
|
"eval_wer": 10.347972972972972, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 9.371578947368421e-06, |
|
"loss": 0.1083, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 9.266315789473685e-06, |
|
"loss": 0.1043, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 9.161052631578948e-06, |
|
"loss": 0.1016, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.055789473684211e-06, |
|
"loss": 0.1023, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.950526315789474e-06, |
|
"loss": 0.101, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.845263157894738e-06, |
|
"loss": 0.099, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 8.740000000000001e-06, |
|
"loss": 0.1005, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.634736842105264e-06, |
|
"loss": 0.1002, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 8.529473684210528e-06, |
|
"loss": 0.0601, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.42421052631579e-06, |
|
"loss": 0.0487, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_cer": 5.912657213073819, |
|
"eval_loss": 0.17993855476379395, |
|
"eval_runtime": 4312.9082, |
|
"eval_samples_per_second": 1.067, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 9.47635135135135, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 8.318947368421052e-06, |
|
"loss": 0.0508, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.213684210526316e-06, |
|
"loss": 0.0501, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.10842105263158e-06, |
|
"loss": 0.0506, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.003157894736842e-06, |
|
"loss": 0.0498, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.897894736842106e-06, |
|
"loss": 0.0486, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 7.792631578947369e-06, |
|
"loss": 0.0513, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.687368421052632e-06, |
|
"loss": 0.0504, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 7.582105263157895e-06, |
|
"loss": 0.0307, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 7.476842105263159e-06, |
|
"loss": 0.0239, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.371578947368422e-06, |
|
"loss": 0.0243, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_cer": 5.806862835289325, |
|
"eval_loss": 0.19502821564674377, |
|
"eval_runtime": 4299.7561, |
|
"eval_samples_per_second": 1.071, |
|
"eval_steps_per_second": 0.067, |
|
"eval_wer": 9.21114864864865, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 7.266315789473684e-06, |
|
"loss": 0.024, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.161052631578948e-06, |
|
"loss": 0.0252, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.055789473684212e-06, |
|
"loss": 0.0244, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.950526315789474e-06, |
|
"loss": 0.0249, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.845263157894737e-06, |
|
"loss": 0.0247, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 6.740000000000001e-06, |
|
"loss": 0.0245, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 6.634736842105264e-06, |
|
"loss": 0.0173, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 6.529473684210526e-06, |
|
"loss": 0.0106, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 6.42421052631579e-06, |
|
"loss": 0.0116, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 6.318947368421054e-06, |
|
"loss": 0.0106, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_cer": 5.575560841623846, |
|
"eval_loss": 0.21126572787761688, |
|
"eval_runtime": 4218.4134, |
|
"eval_samples_per_second": 1.091, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.971283783783784, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 6.213684210526316e-06, |
|
"loss": 0.0115, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 6.108421052631579e-06, |
|
"loss": 0.0105, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 6.003157894736843e-06, |
|
"loss": 0.0104, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 5.897894736842106e-06, |
|
"loss": 0.0123, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.7926315789473685e-06, |
|
"loss": 0.0116, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 5.687368421052633e-06, |
|
"loss": 0.0079, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 5.582105263157895e-06, |
|
"loss": 0.0053, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 5.476842105263158e-06, |
|
"loss": 0.0052, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 5.371578947368421e-06, |
|
"loss": 0.0053, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 5.266315789473685e-06, |
|
"loss": 0.0054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_cer": 5.404055670184384, |
|
"eval_loss": 0.23248465359210968, |
|
"eval_runtime": 4237.1053, |
|
"eval_samples_per_second": 1.087, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.64695945945946, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 5.161052631578948e-06, |
|
"loss": 0.0053, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 5.0557894736842105e-06, |
|
"loss": 0.0056, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.950526315789474e-06, |
|
"loss": 0.005, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.845263157894737e-06, |
|
"loss": 0.0054, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 4.74e-06, |
|
"loss": 0.0045, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.634736842105264e-06, |
|
"loss": 0.0029, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 4.529473684210527e-06, |
|
"loss": 0.0024, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 4.424210526315789e-06, |
|
"loss": 0.0025, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 4.3189473684210535e-06, |
|
"loss": 0.0026, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 4.213684210526316e-06, |
|
"loss": 0.0031, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"eval_cer": 5.440853714631165, |
|
"eval_loss": 0.24615277349948883, |
|
"eval_runtime": 4218.0606, |
|
"eval_samples_per_second": 1.091, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.70777027027027, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.108421052631579e-06, |
|
"loss": 0.0028, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 4.0031578947368424e-06, |
|
"loss": 0.0026, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 3.897894736842106e-06, |
|
"loss": 0.0027, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.792631578947369e-06, |
|
"loss": 0.0021, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 3.687368421052632e-06, |
|
"loss": 0.0014, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 3.582105263157895e-06, |
|
"loss": 0.0015, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 3.476842105263158e-06, |
|
"loss": 0.001, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 3.3715789473684212e-06, |
|
"loss": 0.0015, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 3.2663157894736845e-06, |
|
"loss": 0.0013, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 3.1610526315789474e-06, |
|
"loss": 0.0014, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_cer": 5.484879946379992, |
|
"eval_loss": 0.26076802611351013, |
|
"eval_runtime": 4205.2214, |
|
"eval_samples_per_second": 1.095, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.714527027027028, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.055789473684211e-06, |
|
"loss": 0.0013, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 2.9505263157894735e-06, |
|
"loss": 0.0013, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 2.845263157894737e-06, |
|
"loss": 0.0018, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 2.7400000000000004e-06, |
|
"loss": 0.0011, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 2.6347368421052633e-06, |
|
"loss": 0.0009, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 2.5294736842105266e-06, |
|
"loss": 0.0008, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 2.42421052631579e-06, |
|
"loss": 0.0008, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 2.3189473684210527e-06, |
|
"loss": 0.0008, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 2.213684210526316e-06, |
|
"loss": 0.0008, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 2.1084210526315792e-06, |
|
"loss": 0.0009, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_cer": 5.387627971770643, |
|
"eval_loss": 0.269546777009964, |
|
"eval_runtime": 4205.0269, |
|
"eval_samples_per_second": 1.095, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.630067567567568, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 2.003157894736842e-06, |
|
"loss": 0.0007, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.8978947368421056e-06, |
|
"loss": 0.0009, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 1.7926315789473686e-06, |
|
"loss": 0.0007, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 1.6873684210526317e-06, |
|
"loss": 0.0006, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 1.5821052631578948e-06, |
|
"loss": 0.0005, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 1.4768421052631578e-06, |
|
"loss": 0.0005, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 1.3715789473684213e-06, |
|
"loss": 0.0006, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.2663157894736844e-06, |
|
"loss": 0.0005, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 1.1610526315789474e-06, |
|
"loss": 0.0005, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"learning_rate": 1.0557894736842105e-06, |
|
"loss": 0.0004, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_cer": 5.352801251133512, |
|
"eval_loss": 0.2793508470058441, |
|
"eval_runtime": 4203.0281, |
|
"eval_samples_per_second": 1.095, |
|
"eval_steps_per_second": 0.069, |
|
"eval_wer": 8.60641891891892, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 9.505263157894738e-07, |
|
"loss": 0.0005, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 8.452631578947369e-07, |
|
"loss": 0.0005, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 7.4e-07, |
|
"loss": 0.0003, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 6.347368421052633e-07, |
|
"loss": 0.0004, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 5.294736842105263e-07, |
|
"loss": 0.0003, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 4.242105263157895e-07, |
|
"loss": 0.0004, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 3.1894736842105263e-07, |
|
"loss": 0.0003, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 2.136842105263158e-07, |
|
"loss": 0.0003, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 1.0842105263157895e-07, |
|
"loss": 0.0004, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 3.1578947368421054e-09, |
|
"loss": 0.0003, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"eval_cer": 5.469766463839351, |
|
"eval_loss": 0.2836119830608368, |
|
"eval_runtime": 4263.2576, |
|
"eval_samples_per_second": 1.08, |
|
"eval_steps_per_second": 0.068, |
|
"eval_wer": 8.721283783783784, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"step": 10000, |
|
"total_flos": 3.265323341119488e+20, |
|
"train_loss": 0.043234722255170346, |
|
"train_runtime": 85032.8136, |
|
"train_samples_per_second": 3.763, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"max_steps": 10000, |
|
"num_train_epochs": 11, |
|
"total_flos": 3.265323341119488e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|