{ "best_metric": 0.1275225579738617, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-male-model/checkpoint-1800", "epoch": 5.172413793103448, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24630541871921183, "grad_norm": 1.8344100713729858, "learning_rate": 0.00028799999999999995, "loss": 6.345, "step": 100 }, { "epoch": 0.24630541871921183, "eval_loss": 0.45057007670402527, "eval_runtime": 16.3753, "eval_samples_per_second": 11.236, "eval_steps_per_second": 2.809, "eval_wer": 0.4025974025974026, "step": 100 }, { "epoch": 0.49261083743842365, "grad_norm": 1.3397903442382812, "learning_rate": 0.0002976158940397351, "loss": 0.353, "step": 200 }, { "epoch": 0.49261083743842365, "eval_loss": 0.2066199630498886, "eval_runtime": 16.5098, "eval_samples_per_second": 11.145, "eval_steps_per_second": 2.786, "eval_wer": 0.2924045651318379, "step": 200 }, { "epoch": 0.7389162561576355, "grad_norm": 68.3929672241211, "learning_rate": 0.00029513245033112583, "loss": 0.2989, "step": 300 }, { "epoch": 0.7389162561576355, "eval_loss": 0.18291255831718445, "eval_runtime": 16.4355, "eval_samples_per_second": 11.195, "eval_steps_per_second": 2.799, "eval_wer": 0.2715466351829988, "step": 300 }, { "epoch": 0.9852216748768473, "grad_norm": 1.0996179580688477, "learning_rate": 0.0002926490066225165, "loss": 0.2402, "step": 400 }, { "epoch": 0.9852216748768473, "eval_loss": 0.16742299497127533, "eval_runtime": 16.3654, "eval_samples_per_second": 11.243, "eval_steps_per_second": 2.811, "eval_wer": 0.2546241637150728, "step": 400 }, { "epoch": 1.2315270935960592, "grad_norm": 0.8030900359153748, "learning_rate": 0.00029016556291390727, "loss": 0.2302, "step": 500 }, { "epoch": 1.2315270935960592, "eval_loss": 0.15821850299835205, "eval_runtime": 16.4101, "eval_samples_per_second": 11.213, "eval_steps_per_second": 2.803, "eval_wer": 0.24360487996851632, "step": 500 }, { "epoch": 1.477832512315271, "grad_norm": 0.7671955227851868, "learning_rate": 0.00028768211920529796, "loss": 0.2279, "step": 600 }, { "epoch": 1.477832512315271, "eval_loss": 0.16019868850708008, "eval_runtime": 16.6478, "eval_samples_per_second": 11.053, "eval_steps_per_second": 2.763, "eval_wer": 0.24281778827233372, "step": 600 }, { "epoch": 1.7241379310344827, "grad_norm": 0.7692348957061768, "learning_rate": 0.0002851986754966887, "loss": 0.2264, "step": 700 }, { "epoch": 1.7241379310344827, "eval_loss": 0.15070533752441406, "eval_runtime": 16.3912, "eval_samples_per_second": 11.226, "eval_steps_per_second": 2.806, "eval_wer": 0.22865013774104684, "step": 700 }, { "epoch": 1.9704433497536946, "grad_norm": 0.5601118206977844, "learning_rate": 0.00028271523178807946, "loss": 0.2276, "step": 800 }, { "epoch": 1.9704433497536946, "eval_loss": 0.14960433542728424, "eval_runtime": 16.3656, "eval_samples_per_second": 11.243, "eval_steps_per_second": 2.811, "eval_wer": 0.23534041715859896, "step": 800 }, { "epoch": 2.2167487684729066, "grad_norm": 0.7260679006576538, "learning_rate": 0.00028023178807947015, "loss": 0.2088, "step": 900 }, { "epoch": 2.2167487684729066, "eval_loss": 0.1460462063550949, "eval_runtime": 16.4242, "eval_samples_per_second": 11.203, "eval_steps_per_second": 2.801, "eval_wer": 0.22077922077922077, "step": 900 }, { "epoch": 2.4630541871921183, "grad_norm": 0.4174855649471283, "learning_rate": 0.0002777483443708609, "loss": 0.1881, "step": 1000 }, { "epoch": 2.4630541871921183, "eval_loss": 0.14552360773086548, "eval_runtime": 16.4841, "eval_samples_per_second": 11.162, "eval_steps_per_second": 2.791, "eval_wer": 0.21645021645021645, "step": 1000 }, { "epoch": 2.70935960591133, "grad_norm": 2.906888484954834, "learning_rate": 0.00027526490066225165, "loss": 0.2079, "step": 1100 }, { "epoch": 2.70935960591133, "eval_loss": 0.14178700745105743, "eval_runtime": 16.3484, "eval_samples_per_second": 11.255, "eval_steps_per_second": 2.814, "eval_wer": 0.21684376229830776, "step": 1100 }, { "epoch": 2.955665024630542, "grad_norm": 8.858180046081543, "learning_rate": 0.0002727814569536424, "loss": 0.196, "step": 1200 }, { "epoch": 2.955665024630542, "eval_loss": 0.140374094247818, "eval_runtime": 16.4414, "eval_samples_per_second": 11.191, "eval_steps_per_second": 2.798, "eval_wer": 0.2085792994883904, "step": 1200 }, { "epoch": 3.2019704433497536, "grad_norm": 0.8277586102485657, "learning_rate": 0.0002702980132450331, "loss": 0.1782, "step": 1300 }, { "epoch": 3.2019704433497536, "eval_loss": 0.13734222948551178, "eval_runtime": 16.5713, "eval_samples_per_second": 11.104, "eval_steps_per_second": 2.776, "eval_wer": 0.2077922077922078, "step": 1300 }, { "epoch": 3.4482758620689653, "grad_norm": 0.6015973091125488, "learning_rate": 0.0002678145695364238, "loss": 0.1741, "step": 1400 }, { "epoch": 3.4482758620689653, "eval_loss": 0.13425125181674957, "eval_runtime": 16.5286, "eval_samples_per_second": 11.132, "eval_steps_per_second": 2.783, "eval_wer": 0.1944116489571035, "step": 1400 }, { "epoch": 3.6945812807881775, "grad_norm": 0.8872548341751099, "learning_rate": 0.00026533112582781453, "loss": 0.1948, "step": 1500 }, { "epoch": 3.6945812807881775, "eval_loss": 0.1318366825580597, "eval_runtime": 16.3284, "eval_samples_per_second": 11.269, "eval_steps_per_second": 2.817, "eval_wer": 0.21369539551357733, "step": 1500 }, { "epoch": 3.9408866995073892, "grad_norm": 1.193844199180603, "learning_rate": 0.0002628476821192053, "loss": 0.1904, "step": 1600 }, { "epoch": 3.9408866995073892, "eval_loss": 0.13069555163383484, "eval_runtime": 16.3618, "eval_samples_per_second": 11.246, "eval_steps_per_second": 2.811, "eval_wer": 0.20425029515938606, "step": 1600 }, { "epoch": 4.187192118226601, "grad_norm": 0.6119738221168518, "learning_rate": 0.000260364238410596, "loss": 0.1762, "step": 1700 }, { "epoch": 4.187192118226601, "eval_loss": 0.13134504854679108, "eval_runtime": 16.468, "eval_samples_per_second": 11.173, "eval_steps_per_second": 2.793, "eval_wer": 0.20031483667847305, "step": 1700 }, { "epoch": 4.433497536945813, "grad_norm": 0.7769166827201843, "learning_rate": 0.0002578807947019867, "loss": 0.1718, "step": 1800 }, { "epoch": 4.433497536945813, "eval_loss": 0.1275225579738617, "eval_runtime": 16.422, "eval_samples_per_second": 11.204, "eval_steps_per_second": 2.801, "eval_wer": 0.19323101141282958, "step": 1800 }, { "epoch": 4.679802955665025, "grad_norm": 0.7259223461151123, "learning_rate": 0.00025539735099337746, "loss": 0.1595, "step": 1900 }, { "epoch": 4.679802955665025, "eval_loss": 0.1275661438703537, "eval_runtime": 16.3823, "eval_samples_per_second": 11.232, "eval_steps_per_second": 2.808, "eval_wer": 0.1951987406532861, "step": 1900 }, { "epoch": 4.926108374384237, "grad_norm": 0.5679446458816528, "learning_rate": 0.0002529139072847682, "loss": 0.1811, "step": 2000 }, { "epoch": 4.926108374384237, "eval_loss": 0.12904822826385498, "eval_runtime": 16.3388, "eval_samples_per_second": 11.262, "eval_steps_per_second": 2.815, "eval_wer": 0.19834710743801653, "step": 2000 }, { "epoch": 5.172413793103448, "grad_norm": 0.6480135917663574, "learning_rate": 0.00025043046357615896, "loss": 0.1477, "step": 2100 }, { "epoch": 5.172413793103448, "eval_loss": 0.12981154024600983, "eval_runtime": 16.4531, "eval_samples_per_second": 11.183, "eval_steps_per_second": 2.796, "eval_wer": 0.1951987406532861, "step": 2100 }, { "epoch": 5.172413793103448, "step": 2100, "total_flos": 7.341751672651536e+18, "train_loss": 0.5011415631430489, "train_runtime": 1836.4834, "train_samples_per_second": 26.529, "train_steps_per_second": 6.632 } ], "logging_steps": 100, "max_steps": 12180, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7.341751672651536e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }