{ "best_metric": 0.30534103512763977, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-male-model-test/checkpoint-2300", "epoch": 2.688728024819028, "eval_steps": 100, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10341261633919338, "grad_norm": 3.1888983249664307, "learning_rate": 0.00028799999999999995, "loss": 6.9809, "step": 100 }, { "epoch": 0.10341261633919338, "eval_loss": 1.3138622045516968, "eval_runtime": 31.0692, "eval_samples_per_second": 15.803, "eval_steps_per_second": 3.959, "eval_wer": 0.9957016978293574, "step": 100 }, { "epoch": 0.20682523267838676, "grad_norm": 2.4408092498779297, "learning_rate": 0.0002990038049117952, "loss": 0.745, "step": 200 }, { "epoch": 0.20682523267838676, "eval_loss": 0.4297034740447998, "eval_runtime": 31.0581, "eval_samples_per_second": 15.809, "eval_steps_per_second": 3.96, "eval_wer": 0.5882226520524393, "step": 200 }, { "epoch": 0.31023784901758017, "grad_norm": 1.8350099325180054, "learning_rate": 0.0002979661016949152, "loss": 0.5423, "step": 300 }, { "epoch": 0.31023784901758017, "eval_loss": 0.3886265754699707, "eval_runtime": 30.9037, "eval_samples_per_second": 15.888, "eval_steps_per_second": 3.98, "eval_wer": 0.5643670750053729, "step": 300 }, { "epoch": 0.4136504653567735, "grad_norm": 4.089741230010986, "learning_rate": 0.00029692839847803525, "loss": 0.539, "step": 400 }, { "epoch": 0.4136504653567735, "eval_loss": 0.36830800771713257, "eval_runtime": 30.9142, "eval_samples_per_second": 15.883, "eval_steps_per_second": 3.979, "eval_wer": 0.5448098001289491, "step": 400 }, { "epoch": 0.5170630816959669, "grad_norm": 2.093287706375122, "learning_rate": 0.0002958906952611553, "loss": 0.5277, "step": 500 }, { "epoch": 0.5170630816959669, "eval_loss": 0.3528771698474884, "eval_runtime": 31.284, "eval_samples_per_second": 15.695, "eval_steps_per_second": 3.932, "eval_wer": 0.508274231678487, "step": 500 }, { "epoch": 0.6204756980351603, "grad_norm": 1.6442524194717407, "learning_rate": 0.0002948529920442753, "loss": 0.4708, "step": 600 }, { "epoch": 0.6204756980351603, "eval_loss": 0.3492579162120819, "eval_runtime": 31.108, "eval_samples_per_second": 15.784, "eval_steps_per_second": 3.954, "eval_wer": 0.4996776273372018, "step": 600 }, { "epoch": 0.7238883143743536, "grad_norm": 2.5362725257873535, "learning_rate": 0.00029381528882739533, "loss": 0.4889, "step": 700 }, { "epoch": 0.7238883143743536, "eval_loss": 0.3467372953891754, "eval_runtime": 31.1038, "eval_samples_per_second": 15.786, "eval_steps_per_second": 3.954, "eval_wer": 0.5095637223296797, "step": 700 }, { "epoch": 0.827300930713547, "grad_norm": 4.026537895202637, "learning_rate": 0.00029277758561051535, "loss": 0.4793, "step": 800 }, { "epoch": 0.827300930713547, "eval_loss": 0.34066715836524963, "eval_runtime": 30.9432, "eval_samples_per_second": 15.868, "eval_steps_per_second": 3.975, "eval_wer": 0.48183967332903505, "step": 800 }, { "epoch": 0.9307135470527405, "grad_norm": 4.126669406890869, "learning_rate": 0.0002917398823936354, "loss": 0.469, "step": 900 }, { "epoch": 0.9307135470527405, "eval_loss": 0.3455369174480438, "eval_runtime": 31.0926, "eval_samples_per_second": 15.792, "eval_steps_per_second": 3.956, "eval_wer": 0.49580915538362347, "step": 900 }, { "epoch": 1.0341261633919339, "grad_norm": 1.1861900091171265, "learning_rate": 0.0002907021791767554, "loss": 0.4407, "step": 1000 }, { "epoch": 1.0341261633919339, "eval_loss": 0.3328763544559479, "eval_runtime": 31.2336, "eval_samples_per_second": 15.72, "eval_steps_per_second": 3.938, "eval_wer": 0.473457984096282, "step": 1000 }, { "epoch": 1.1375387797311272, "grad_norm": 1.2025549411773682, "learning_rate": 0.00028967485299204426, "loss": 0.4524, "step": 1100 }, { "epoch": 1.1375387797311272, "eval_loss": 0.32889580726623535, "eval_runtime": 31.3882, "eval_samples_per_second": 15.643, "eval_steps_per_second": 3.919, "eval_wer": 0.4878572963679347, "step": 1100 }, { "epoch": 1.2409513960703205, "grad_norm": 1.4938141107559204, "learning_rate": 0.0002886371497751643, "loss": 0.4416, "step": 1200 }, { "epoch": 1.2409513960703205, "eval_loss": 0.327963262796402, "eval_runtime": 30.9988, "eval_samples_per_second": 15.839, "eval_steps_per_second": 3.968, "eval_wer": 0.4910810229959166, "step": 1200 }, { "epoch": 1.344364012409514, "grad_norm": 1.22685706615448, "learning_rate": 0.0002875994465582843, "loss": 0.4599, "step": 1300 }, { "epoch": 1.344364012409514, "eval_loss": 0.3284839391708374, "eval_runtime": 31.2403, "eval_samples_per_second": 15.717, "eval_steps_per_second": 3.937, "eval_wer": 0.4764667956157318, "step": 1300 }, { "epoch": 1.4477766287487073, "grad_norm": 1.3746376037597656, "learning_rate": 0.00028656174334140434, "loss": 0.4739, "step": 1400 }, { "epoch": 1.4477766287487073, "eval_loss": 0.3221452236175537, "eval_runtime": 31.2054, "eval_samples_per_second": 15.734, "eval_steps_per_second": 3.942, "eval_wer": 0.4693745970341715, "step": 1400 }, { "epoch": 1.5511892450879006, "grad_norm": 1.6073061227798462, "learning_rate": 0.00028552404012452436, "loss": 0.4466, "step": 1500 }, { "epoch": 1.5511892450879006, "eval_loss": 0.3196486234664917, "eval_runtime": 31.0094, "eval_samples_per_second": 15.834, "eval_steps_per_second": 3.967, "eval_wer": 0.4588437567160971, "step": 1500 }, { "epoch": 1.654601861427094, "grad_norm": 1.1827540397644043, "learning_rate": 0.0002844967139398132, "loss": 0.4483, "step": 1600 }, { "epoch": 1.654601861427094, "eval_loss": 0.3144252896308899, "eval_runtime": 31.0445, "eval_samples_per_second": 15.816, "eval_steps_per_second": 3.962, "eval_wer": 0.4526112185686654, "step": 1600 }, { "epoch": 1.7580144777662876, "grad_norm": 1.335554599761963, "learning_rate": 0.0002834590107229332, "loss": 0.4543, "step": 1700 }, { "epoch": 1.7580144777662876, "eval_loss": 0.31697967648506165, "eval_runtime": 31.4988, "eval_samples_per_second": 15.588, "eval_steps_per_second": 3.905, "eval_wer": 0.4528261336771975, "step": 1700 }, { "epoch": 1.861427094105481, "grad_norm": 2.555410385131836, "learning_rate": 0.00028242130750605327, "loss": 0.4537, "step": 1800 }, { "epoch": 1.861427094105481, "eval_loss": 0.3140595555305481, "eval_runtime": 31.3179, "eval_samples_per_second": 15.678, "eval_steps_per_second": 3.927, "eval_wer": 0.4521813883516011, "step": 1800 }, { "epoch": 1.9648397104446742, "grad_norm": 1.150947093963623, "learning_rate": 0.0002813836042891733, "loss": 0.4293, "step": 1900 }, { "epoch": 1.9648397104446742, "eval_loss": 0.31063735485076904, "eval_runtime": 31.1712, "eval_samples_per_second": 15.752, "eval_steps_per_second": 3.946, "eval_wer": 0.44530410487857297, "step": 1900 }, { "epoch": 2.0682523267838677, "grad_norm": 0.674537181854248, "learning_rate": 0.00028034590107229326, "loss": 0.4457, "step": 2000 }, { "epoch": 2.0682523267838677, "eval_loss": 0.3134273290634155, "eval_runtime": 31.1408, "eval_samples_per_second": 15.767, "eval_steps_per_second": 3.95, "eval_wer": 0.4650762948635289, "step": 2000 }, { "epoch": 2.1716649431230612, "grad_norm": 0.9923437237739563, "learning_rate": 0.00027930819785541334, "loss": 0.4214, "step": 2100 }, { "epoch": 2.1716649431230612, "eval_loss": 0.3119480609893799, "eval_runtime": 31.4395, "eval_samples_per_second": 15.617, "eval_steps_per_second": 3.912, "eval_wer": 0.4543305394369224, "step": 2100 }, { "epoch": 2.2750775594622543, "grad_norm": 0.9173412919044495, "learning_rate": 0.00027827049463853337, "loss": 0.4103, "step": 2200 }, { "epoch": 2.2750775594622543, "eval_loss": 0.3089054822921753, "eval_runtime": 31.248, "eval_samples_per_second": 15.713, "eval_steps_per_second": 3.936, "eval_wer": 0.43907156673114117, "step": 2200 }, { "epoch": 2.378490175801448, "grad_norm": 3.1410484313964844, "learning_rate": 0.0002772327914216534, "loss": 0.407, "step": 2300 }, { "epoch": 2.378490175801448, "eval_loss": 0.30534103512763977, "eval_runtime": 31.5782, "eval_samples_per_second": 15.549, "eval_steps_per_second": 3.895, "eval_wer": 0.43305394369224154, "step": 2300 }, { "epoch": 2.481902792140641, "grad_norm": 1.2900283336639404, "learning_rate": 0.0002761950882047734, "loss": 0.4314, "step": 2400 }, { "epoch": 2.481902792140641, "eval_loss": 0.30588600039482117, "eval_runtime": 31.1067, "eval_samples_per_second": 15.784, "eval_steps_per_second": 3.954, "eval_wer": 0.433698689017838, "step": 2400 }, { "epoch": 2.5853154084798344, "grad_norm": 1.6208014488220215, "learning_rate": 0.00027515738498789345, "loss": 0.4144, "step": 2500 }, { "epoch": 2.5853154084798344, "eval_loss": 0.30535265803337097, "eval_runtime": 31.4136, "eval_samples_per_second": 15.63, "eval_steps_per_second": 3.915, "eval_wer": 0.4382119062970127, "step": 2500 }, { "epoch": 2.688728024819028, "grad_norm": 1.7971055507659912, "learning_rate": 0.0002741196817710135, "loss": 0.4099, "step": 2600 }, { "epoch": 2.688728024819028, "eval_loss": 0.3059903085231781, "eval_runtime": 31.3346, "eval_samples_per_second": 15.67, "eval_steps_per_second": 3.925, "eval_wer": 0.4446593595529766, "step": 2600 }, { "epoch": 2.688728024819028, "step": 2600, "total_flos": 6.22653785519667e+18, "train_loss": 0.718606931246244, "train_runtime": 2586.1395, "train_samples_per_second": 44.847, "train_steps_per_second": 11.217 } ], "logging_steps": 100, "max_steps": 29010, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.22653785519667e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }