|
{ |
|
"best_metric": 0.30534103512763977, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-male-model-test/checkpoint-2300", |
|
"epoch": 2.688728024819028, |
|
"eval_steps": 100, |
|
"global_step": 2600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10341261633919338, |
|
"grad_norm": 3.1888983249664307, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 6.9809, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10341261633919338, |
|
"eval_loss": 1.3138622045516968, |
|
"eval_runtime": 31.0692, |
|
"eval_samples_per_second": 15.803, |
|
"eval_steps_per_second": 3.959, |
|
"eval_wer": 0.9957016978293574, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20682523267838676, |
|
"grad_norm": 2.4408092498779297, |
|
"learning_rate": 0.0002990038049117952, |
|
"loss": 0.745, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20682523267838676, |
|
"eval_loss": 0.4297034740447998, |
|
"eval_runtime": 31.0581, |
|
"eval_samples_per_second": 15.809, |
|
"eval_steps_per_second": 3.96, |
|
"eval_wer": 0.5882226520524393, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31023784901758017, |
|
"grad_norm": 1.8350099325180054, |
|
"learning_rate": 0.0002979661016949152, |
|
"loss": 0.5423, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31023784901758017, |
|
"eval_loss": 0.3886265754699707, |
|
"eval_runtime": 30.9037, |
|
"eval_samples_per_second": 15.888, |
|
"eval_steps_per_second": 3.98, |
|
"eval_wer": 0.5643670750053729, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4136504653567735, |
|
"grad_norm": 4.089741230010986, |
|
"learning_rate": 0.00029692839847803525, |
|
"loss": 0.539, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4136504653567735, |
|
"eval_loss": 0.36830800771713257, |
|
"eval_runtime": 30.9142, |
|
"eval_samples_per_second": 15.883, |
|
"eval_steps_per_second": 3.979, |
|
"eval_wer": 0.5448098001289491, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5170630816959669, |
|
"grad_norm": 2.093287706375122, |
|
"learning_rate": 0.0002958906952611553, |
|
"loss": 0.5277, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5170630816959669, |
|
"eval_loss": 0.3528771698474884, |
|
"eval_runtime": 31.284, |
|
"eval_samples_per_second": 15.695, |
|
"eval_steps_per_second": 3.932, |
|
"eval_wer": 0.508274231678487, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6204756980351603, |
|
"grad_norm": 1.6442524194717407, |
|
"learning_rate": 0.0002948529920442753, |
|
"loss": 0.4708, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6204756980351603, |
|
"eval_loss": 0.3492579162120819, |
|
"eval_runtime": 31.108, |
|
"eval_samples_per_second": 15.784, |
|
"eval_steps_per_second": 3.954, |
|
"eval_wer": 0.4996776273372018, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7238883143743536, |
|
"grad_norm": 2.5362725257873535, |
|
"learning_rate": 0.00029381528882739533, |
|
"loss": 0.4889, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7238883143743536, |
|
"eval_loss": 0.3467372953891754, |
|
"eval_runtime": 31.1038, |
|
"eval_samples_per_second": 15.786, |
|
"eval_steps_per_second": 3.954, |
|
"eval_wer": 0.5095637223296797, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.827300930713547, |
|
"grad_norm": 4.026537895202637, |
|
"learning_rate": 0.00029277758561051535, |
|
"loss": 0.4793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.827300930713547, |
|
"eval_loss": 0.34066715836524963, |
|
"eval_runtime": 30.9432, |
|
"eval_samples_per_second": 15.868, |
|
"eval_steps_per_second": 3.975, |
|
"eval_wer": 0.48183967332903505, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.9307135470527405, |
|
"grad_norm": 4.126669406890869, |
|
"learning_rate": 0.0002917398823936354, |
|
"loss": 0.469, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9307135470527405, |
|
"eval_loss": 0.3455369174480438, |
|
"eval_runtime": 31.0926, |
|
"eval_samples_per_second": 15.792, |
|
"eval_steps_per_second": 3.956, |
|
"eval_wer": 0.49580915538362347, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.0341261633919339, |
|
"grad_norm": 1.1861900091171265, |
|
"learning_rate": 0.0002907021791767554, |
|
"loss": 0.4407, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0341261633919339, |
|
"eval_loss": 0.3328763544559479, |
|
"eval_runtime": 31.2336, |
|
"eval_samples_per_second": 15.72, |
|
"eval_steps_per_second": 3.938, |
|
"eval_wer": 0.473457984096282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.1375387797311272, |
|
"grad_norm": 1.2025549411773682, |
|
"learning_rate": 0.00028967485299204426, |
|
"loss": 0.4524, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.1375387797311272, |
|
"eval_loss": 0.32889580726623535, |
|
"eval_runtime": 31.3882, |
|
"eval_samples_per_second": 15.643, |
|
"eval_steps_per_second": 3.919, |
|
"eval_wer": 0.4878572963679347, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.2409513960703205, |
|
"grad_norm": 1.4938141107559204, |
|
"learning_rate": 0.0002886371497751643, |
|
"loss": 0.4416, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.2409513960703205, |
|
"eval_loss": 0.327963262796402, |
|
"eval_runtime": 30.9988, |
|
"eval_samples_per_second": 15.839, |
|
"eval_steps_per_second": 3.968, |
|
"eval_wer": 0.4910810229959166, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.344364012409514, |
|
"grad_norm": 1.22685706615448, |
|
"learning_rate": 0.0002875994465582843, |
|
"loss": 0.4599, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.344364012409514, |
|
"eval_loss": 0.3284839391708374, |
|
"eval_runtime": 31.2403, |
|
"eval_samples_per_second": 15.717, |
|
"eval_steps_per_second": 3.937, |
|
"eval_wer": 0.4764667956157318, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.4477766287487073, |
|
"grad_norm": 1.3746376037597656, |
|
"learning_rate": 0.00028656174334140434, |
|
"loss": 0.4739, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.4477766287487073, |
|
"eval_loss": 0.3221452236175537, |
|
"eval_runtime": 31.2054, |
|
"eval_samples_per_second": 15.734, |
|
"eval_steps_per_second": 3.942, |
|
"eval_wer": 0.4693745970341715, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.5511892450879006, |
|
"grad_norm": 1.6073061227798462, |
|
"learning_rate": 0.00028552404012452436, |
|
"loss": 0.4466, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.5511892450879006, |
|
"eval_loss": 0.3196486234664917, |
|
"eval_runtime": 31.0094, |
|
"eval_samples_per_second": 15.834, |
|
"eval_steps_per_second": 3.967, |
|
"eval_wer": 0.4588437567160971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.654601861427094, |
|
"grad_norm": 1.1827540397644043, |
|
"learning_rate": 0.0002844967139398132, |
|
"loss": 0.4483, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.654601861427094, |
|
"eval_loss": 0.3144252896308899, |
|
"eval_runtime": 31.0445, |
|
"eval_samples_per_second": 15.816, |
|
"eval_steps_per_second": 3.962, |
|
"eval_wer": 0.4526112185686654, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.7580144777662876, |
|
"grad_norm": 1.335554599761963, |
|
"learning_rate": 0.0002834590107229332, |
|
"loss": 0.4543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.7580144777662876, |
|
"eval_loss": 0.31697967648506165, |
|
"eval_runtime": 31.4988, |
|
"eval_samples_per_second": 15.588, |
|
"eval_steps_per_second": 3.905, |
|
"eval_wer": 0.4528261336771975, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.861427094105481, |
|
"grad_norm": 2.555410385131836, |
|
"learning_rate": 0.00028242130750605327, |
|
"loss": 0.4537, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.861427094105481, |
|
"eval_loss": 0.3140595555305481, |
|
"eval_runtime": 31.3179, |
|
"eval_samples_per_second": 15.678, |
|
"eval_steps_per_second": 3.927, |
|
"eval_wer": 0.4521813883516011, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.9648397104446742, |
|
"grad_norm": 1.150947093963623, |
|
"learning_rate": 0.0002813836042891733, |
|
"loss": 0.4293, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9648397104446742, |
|
"eval_loss": 0.31063735485076904, |
|
"eval_runtime": 31.1712, |
|
"eval_samples_per_second": 15.752, |
|
"eval_steps_per_second": 3.946, |
|
"eval_wer": 0.44530410487857297, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.0682523267838677, |
|
"grad_norm": 0.674537181854248, |
|
"learning_rate": 0.00028034590107229326, |
|
"loss": 0.4457, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0682523267838677, |
|
"eval_loss": 0.3134273290634155, |
|
"eval_runtime": 31.1408, |
|
"eval_samples_per_second": 15.767, |
|
"eval_steps_per_second": 3.95, |
|
"eval_wer": 0.4650762948635289, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.1716649431230612, |
|
"grad_norm": 0.9923437237739563, |
|
"learning_rate": 0.00027930819785541334, |
|
"loss": 0.4214, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.1716649431230612, |
|
"eval_loss": 0.3119480609893799, |
|
"eval_runtime": 31.4395, |
|
"eval_samples_per_second": 15.617, |
|
"eval_steps_per_second": 3.912, |
|
"eval_wer": 0.4543305394369224, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.2750775594622543, |
|
"grad_norm": 0.9173412919044495, |
|
"learning_rate": 0.00027827049463853337, |
|
"loss": 0.4103, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.2750775594622543, |
|
"eval_loss": 0.3089054822921753, |
|
"eval_runtime": 31.248, |
|
"eval_samples_per_second": 15.713, |
|
"eval_steps_per_second": 3.936, |
|
"eval_wer": 0.43907156673114117, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.378490175801448, |
|
"grad_norm": 3.1410484313964844, |
|
"learning_rate": 0.0002772327914216534, |
|
"loss": 0.407, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.378490175801448, |
|
"eval_loss": 0.30534103512763977, |
|
"eval_runtime": 31.5782, |
|
"eval_samples_per_second": 15.549, |
|
"eval_steps_per_second": 3.895, |
|
"eval_wer": 0.43305394369224154, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.481902792140641, |
|
"grad_norm": 1.2900283336639404, |
|
"learning_rate": 0.0002761950882047734, |
|
"loss": 0.4314, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.481902792140641, |
|
"eval_loss": 0.30588600039482117, |
|
"eval_runtime": 31.1067, |
|
"eval_samples_per_second": 15.784, |
|
"eval_steps_per_second": 3.954, |
|
"eval_wer": 0.433698689017838, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.5853154084798344, |
|
"grad_norm": 1.6208014488220215, |
|
"learning_rate": 0.00027515738498789345, |
|
"loss": 0.4144, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.5853154084798344, |
|
"eval_loss": 0.30535265803337097, |
|
"eval_runtime": 31.4136, |
|
"eval_samples_per_second": 15.63, |
|
"eval_steps_per_second": 3.915, |
|
"eval_wer": 0.4382119062970127, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.688728024819028, |
|
"grad_norm": 1.7971055507659912, |
|
"learning_rate": 0.0002741196817710135, |
|
"loss": 0.4099, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.688728024819028, |
|
"eval_loss": 0.3059903085231781, |
|
"eval_runtime": 31.3346, |
|
"eval_samples_per_second": 15.67, |
|
"eval_steps_per_second": 3.925, |
|
"eval_wer": 0.4446593595529766, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.688728024819028, |
|
"step": 2600, |
|
"total_flos": 6.22653785519667e+18, |
|
"train_loss": 0.718606931246244, |
|
"train_runtime": 2586.1395, |
|
"train_samples_per_second": 44.847, |
|
"train_steps_per_second": 11.217 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 29010, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.22653785519667e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|