{ "best_metric": null, "best_model_checkpoint": null, "epoch": 39.99753086419753, "global_step": 8080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.47, "learning_rate": 4.96e-05, "loss": 6.1701, "step": 500 }, { "epoch": 2.47, "eval_loss": 2.8719067573547363, "eval_runtime": 34.3427, "eval_samples_per_second": 11.793, "eval_wer": 1.0, "step": 500 }, { "epoch": 4.95, "learning_rate": 9.960000000000001e-05, "loss": 1.9379, "step": 1000 }, { "epoch": 4.95, "eval_loss": 0.8804072737693787, "eval_runtime": 34.3595, "eval_samples_per_second": 11.787, "eval_wer": 0.6778097982708934, "step": 1000 }, { "epoch": 7.42, "learning_rate": 9.299435028248588e-05, "loss": 1.0455, "step": 1500 }, { "epoch": 7.42, "eval_loss": 0.6408620476722717, "eval_runtime": 34.2855, "eval_samples_per_second": 11.813, "eval_wer": 0.5227665706051873, "step": 1500 }, { "epoch": 9.9, "learning_rate": 8.593220338983051e-05, "loss": 0.7456, "step": 2000 }, { "epoch": 9.9, "eval_loss": 0.6179625391960144, "eval_runtime": 34.3686, "eval_samples_per_second": 11.784, "eval_wer": 0.4755043227665706, "step": 2000 }, { "epoch": 12.38, "learning_rate": 7.887005649717515e-05, "loss": 0.5543, "step": 2500 }, { "epoch": 12.38, "eval_loss": 0.4895203411579132, "eval_runtime": 34.1759, "eval_samples_per_second": 11.85, "eval_wer": 0.4340057636887608, "step": 2500 }, { "epoch": 14.85, "learning_rate": 7.180790960451977e-05, "loss": 0.4526, "step": 3000 }, { "epoch": 14.85, "eval_loss": 0.4277503192424774, "eval_runtime": 34.18, "eval_samples_per_second": 11.849, "eval_wer": 0.42536023054755046, "step": 3000 }, { "epoch": 17.33, "learning_rate": 6.474576271186441e-05, "loss": 0.3827, "step": 3500 }, { "epoch": 17.33, "eval_loss": 0.3866088092327118, "eval_runtime": 34.3584, "eval_samples_per_second": 11.788, "eval_wer": 0.414985590778098, "step": 3500 }, { "epoch": 19.8, "learning_rate": 5.7683615819209045e-05, "loss": 0.3226, "step": 4000 }, { "epoch": 19.8, "eval_loss": 0.38195741176605225, "eval_runtime": 34.2414, "eval_samples_per_second": 11.828, "eval_wer": 0.392507204610951, "step": 4000 }, { "epoch": 22.28, "learning_rate": 5.0635593220338987e-05, "loss": 0.295, "step": 4500 }, { "epoch": 22.28, "eval_loss": 0.41771677136421204, "eval_runtime": 34.2045, "eval_samples_per_second": 11.841, "eval_wer": 0.3919308357348703, "step": 4500 }, { "epoch": 24.75, "learning_rate": 4.357344632768362e-05, "loss": 0.2613, "step": 5000 }, { "epoch": 24.75, "eval_loss": 0.41437995433807373, "eval_runtime": 34.2585, "eval_samples_per_second": 11.822, "eval_wer": 0.3855907780979827, "step": 5000 }, { "epoch": 27.23, "learning_rate": 3.651129943502825e-05, "loss": 0.228, "step": 5500 }, { "epoch": 27.23, "eval_loss": 0.4071010947227478, "eval_runtime": 34.2741, "eval_samples_per_second": 11.817, "eval_wer": 0.38962536023054756, "step": 5500 }, { "epoch": 29.7, "learning_rate": 2.9449152542372883e-05, "loss": 0.2081, "step": 6000 }, { "epoch": 29.7, "eval_loss": 0.3746974468231201, "eval_runtime": 34.1866, "eval_samples_per_second": 11.847, "eval_wer": 0.38213256484149855, "step": 6000 }, { "epoch": 32.18, "learning_rate": 2.2387005649717515e-05, "loss": 0.1906, "step": 6500 }, { "epoch": 32.18, "eval_loss": 0.396076500415802, "eval_runtime": 34.2512, "eval_samples_per_second": 11.824, "eval_wer": 0.37175792507204614, "step": 6500 }, { "epoch": 34.65, "learning_rate": 1.535310734463277e-05, "loss": 0.1752, "step": 7000 }, { "epoch": 34.65, "eval_loss": 0.37449249625205994, "eval_runtime": 34.2395, "eval_samples_per_second": 11.828, "eval_wer": 0.3729106628242075, "step": 7000 }, { "epoch": 37.13, "learning_rate": 8.290960451977402e-06, "loss": 0.176, "step": 7500 }, { "epoch": 37.13, "eval_loss": 0.3883863687515259, "eval_runtime": 34.2865, "eval_samples_per_second": 11.812, "eval_wer": 0.37118155619596543, "step": 7500 }, { "epoch": 39.6, "learning_rate": 1.228813559322034e-06, "loss": 0.1531, "step": 8000 }, { "epoch": 39.6, "eval_loss": 0.3848457336425781, "eval_runtime": 34.1853, "eval_samples_per_second": 11.847, "eval_wer": 0.368299711815562, "step": 8000 }, { "epoch": 40.0, "step": 8080, "total_flos": 3.820109864655936e+16, "train_runtime": 7468.243, "train_samples_per_second": 1.082 } ], "max_steps": 8080, "num_train_epochs": 40, "total_flos": 3.820109864655936e+16, "trial_name": null, "trial_params": null }