mms-1b-bemgen-male-model-test / trainer_state.json
csikasote's picture
End of training
a953690 verified
{
"best_metric": 0.30534103512763977,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-male-model-test/checkpoint-2300",
"epoch": 2.688728024819028,
"eval_steps": 100,
"global_step": 2600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.10341261633919338,
"grad_norm": 3.1888983249664307,
"learning_rate": 0.00028799999999999995,
"loss": 6.9809,
"step": 100
},
{
"epoch": 0.10341261633919338,
"eval_loss": 1.3138622045516968,
"eval_runtime": 31.0692,
"eval_samples_per_second": 15.803,
"eval_steps_per_second": 3.959,
"eval_wer": 0.9957016978293574,
"step": 100
},
{
"epoch": 0.20682523267838676,
"grad_norm": 2.4408092498779297,
"learning_rate": 0.0002990038049117952,
"loss": 0.745,
"step": 200
},
{
"epoch": 0.20682523267838676,
"eval_loss": 0.4297034740447998,
"eval_runtime": 31.0581,
"eval_samples_per_second": 15.809,
"eval_steps_per_second": 3.96,
"eval_wer": 0.5882226520524393,
"step": 200
},
{
"epoch": 0.31023784901758017,
"grad_norm": 1.8350099325180054,
"learning_rate": 0.0002979661016949152,
"loss": 0.5423,
"step": 300
},
{
"epoch": 0.31023784901758017,
"eval_loss": 0.3886265754699707,
"eval_runtime": 30.9037,
"eval_samples_per_second": 15.888,
"eval_steps_per_second": 3.98,
"eval_wer": 0.5643670750053729,
"step": 300
},
{
"epoch": 0.4136504653567735,
"grad_norm": 4.089741230010986,
"learning_rate": 0.00029692839847803525,
"loss": 0.539,
"step": 400
},
{
"epoch": 0.4136504653567735,
"eval_loss": 0.36830800771713257,
"eval_runtime": 30.9142,
"eval_samples_per_second": 15.883,
"eval_steps_per_second": 3.979,
"eval_wer": 0.5448098001289491,
"step": 400
},
{
"epoch": 0.5170630816959669,
"grad_norm": 2.093287706375122,
"learning_rate": 0.0002958906952611553,
"loss": 0.5277,
"step": 500
},
{
"epoch": 0.5170630816959669,
"eval_loss": 0.3528771698474884,
"eval_runtime": 31.284,
"eval_samples_per_second": 15.695,
"eval_steps_per_second": 3.932,
"eval_wer": 0.508274231678487,
"step": 500
},
{
"epoch": 0.6204756980351603,
"grad_norm": 1.6442524194717407,
"learning_rate": 0.0002948529920442753,
"loss": 0.4708,
"step": 600
},
{
"epoch": 0.6204756980351603,
"eval_loss": 0.3492579162120819,
"eval_runtime": 31.108,
"eval_samples_per_second": 15.784,
"eval_steps_per_second": 3.954,
"eval_wer": 0.4996776273372018,
"step": 600
},
{
"epoch": 0.7238883143743536,
"grad_norm": 2.5362725257873535,
"learning_rate": 0.00029381528882739533,
"loss": 0.4889,
"step": 700
},
{
"epoch": 0.7238883143743536,
"eval_loss": 0.3467372953891754,
"eval_runtime": 31.1038,
"eval_samples_per_second": 15.786,
"eval_steps_per_second": 3.954,
"eval_wer": 0.5095637223296797,
"step": 700
},
{
"epoch": 0.827300930713547,
"grad_norm": 4.026537895202637,
"learning_rate": 0.00029277758561051535,
"loss": 0.4793,
"step": 800
},
{
"epoch": 0.827300930713547,
"eval_loss": 0.34066715836524963,
"eval_runtime": 30.9432,
"eval_samples_per_second": 15.868,
"eval_steps_per_second": 3.975,
"eval_wer": 0.48183967332903505,
"step": 800
},
{
"epoch": 0.9307135470527405,
"grad_norm": 4.126669406890869,
"learning_rate": 0.0002917398823936354,
"loss": 0.469,
"step": 900
},
{
"epoch": 0.9307135470527405,
"eval_loss": 0.3455369174480438,
"eval_runtime": 31.0926,
"eval_samples_per_second": 15.792,
"eval_steps_per_second": 3.956,
"eval_wer": 0.49580915538362347,
"step": 900
},
{
"epoch": 1.0341261633919339,
"grad_norm": 1.1861900091171265,
"learning_rate": 0.0002907021791767554,
"loss": 0.4407,
"step": 1000
},
{
"epoch": 1.0341261633919339,
"eval_loss": 0.3328763544559479,
"eval_runtime": 31.2336,
"eval_samples_per_second": 15.72,
"eval_steps_per_second": 3.938,
"eval_wer": 0.473457984096282,
"step": 1000
},
{
"epoch": 1.1375387797311272,
"grad_norm": 1.2025549411773682,
"learning_rate": 0.00028967485299204426,
"loss": 0.4524,
"step": 1100
},
{
"epoch": 1.1375387797311272,
"eval_loss": 0.32889580726623535,
"eval_runtime": 31.3882,
"eval_samples_per_second": 15.643,
"eval_steps_per_second": 3.919,
"eval_wer": 0.4878572963679347,
"step": 1100
},
{
"epoch": 1.2409513960703205,
"grad_norm": 1.4938141107559204,
"learning_rate": 0.0002886371497751643,
"loss": 0.4416,
"step": 1200
},
{
"epoch": 1.2409513960703205,
"eval_loss": 0.327963262796402,
"eval_runtime": 30.9988,
"eval_samples_per_second": 15.839,
"eval_steps_per_second": 3.968,
"eval_wer": 0.4910810229959166,
"step": 1200
},
{
"epoch": 1.344364012409514,
"grad_norm": 1.22685706615448,
"learning_rate": 0.0002875994465582843,
"loss": 0.4599,
"step": 1300
},
{
"epoch": 1.344364012409514,
"eval_loss": 0.3284839391708374,
"eval_runtime": 31.2403,
"eval_samples_per_second": 15.717,
"eval_steps_per_second": 3.937,
"eval_wer": 0.4764667956157318,
"step": 1300
},
{
"epoch": 1.4477766287487073,
"grad_norm": 1.3746376037597656,
"learning_rate": 0.00028656174334140434,
"loss": 0.4739,
"step": 1400
},
{
"epoch": 1.4477766287487073,
"eval_loss": 0.3221452236175537,
"eval_runtime": 31.2054,
"eval_samples_per_second": 15.734,
"eval_steps_per_second": 3.942,
"eval_wer": 0.4693745970341715,
"step": 1400
},
{
"epoch": 1.5511892450879006,
"grad_norm": 1.6073061227798462,
"learning_rate": 0.00028552404012452436,
"loss": 0.4466,
"step": 1500
},
{
"epoch": 1.5511892450879006,
"eval_loss": 0.3196486234664917,
"eval_runtime": 31.0094,
"eval_samples_per_second": 15.834,
"eval_steps_per_second": 3.967,
"eval_wer": 0.4588437567160971,
"step": 1500
},
{
"epoch": 1.654601861427094,
"grad_norm": 1.1827540397644043,
"learning_rate": 0.0002844967139398132,
"loss": 0.4483,
"step": 1600
},
{
"epoch": 1.654601861427094,
"eval_loss": 0.3144252896308899,
"eval_runtime": 31.0445,
"eval_samples_per_second": 15.816,
"eval_steps_per_second": 3.962,
"eval_wer": 0.4526112185686654,
"step": 1600
},
{
"epoch": 1.7580144777662876,
"grad_norm": 1.335554599761963,
"learning_rate": 0.0002834590107229332,
"loss": 0.4543,
"step": 1700
},
{
"epoch": 1.7580144777662876,
"eval_loss": 0.31697967648506165,
"eval_runtime": 31.4988,
"eval_samples_per_second": 15.588,
"eval_steps_per_second": 3.905,
"eval_wer": 0.4528261336771975,
"step": 1700
},
{
"epoch": 1.861427094105481,
"grad_norm": 2.555410385131836,
"learning_rate": 0.00028242130750605327,
"loss": 0.4537,
"step": 1800
},
{
"epoch": 1.861427094105481,
"eval_loss": 0.3140595555305481,
"eval_runtime": 31.3179,
"eval_samples_per_second": 15.678,
"eval_steps_per_second": 3.927,
"eval_wer": 0.4521813883516011,
"step": 1800
},
{
"epoch": 1.9648397104446742,
"grad_norm": 1.150947093963623,
"learning_rate": 0.0002813836042891733,
"loss": 0.4293,
"step": 1900
},
{
"epoch": 1.9648397104446742,
"eval_loss": 0.31063735485076904,
"eval_runtime": 31.1712,
"eval_samples_per_second": 15.752,
"eval_steps_per_second": 3.946,
"eval_wer": 0.44530410487857297,
"step": 1900
},
{
"epoch": 2.0682523267838677,
"grad_norm": 0.674537181854248,
"learning_rate": 0.00028034590107229326,
"loss": 0.4457,
"step": 2000
},
{
"epoch": 2.0682523267838677,
"eval_loss": 0.3134273290634155,
"eval_runtime": 31.1408,
"eval_samples_per_second": 15.767,
"eval_steps_per_second": 3.95,
"eval_wer": 0.4650762948635289,
"step": 2000
},
{
"epoch": 2.1716649431230612,
"grad_norm": 0.9923437237739563,
"learning_rate": 0.00027930819785541334,
"loss": 0.4214,
"step": 2100
},
{
"epoch": 2.1716649431230612,
"eval_loss": 0.3119480609893799,
"eval_runtime": 31.4395,
"eval_samples_per_second": 15.617,
"eval_steps_per_second": 3.912,
"eval_wer": 0.4543305394369224,
"step": 2100
},
{
"epoch": 2.2750775594622543,
"grad_norm": 0.9173412919044495,
"learning_rate": 0.00027827049463853337,
"loss": 0.4103,
"step": 2200
},
{
"epoch": 2.2750775594622543,
"eval_loss": 0.3089054822921753,
"eval_runtime": 31.248,
"eval_samples_per_second": 15.713,
"eval_steps_per_second": 3.936,
"eval_wer": 0.43907156673114117,
"step": 2200
},
{
"epoch": 2.378490175801448,
"grad_norm": 3.1410484313964844,
"learning_rate": 0.0002772327914216534,
"loss": 0.407,
"step": 2300
},
{
"epoch": 2.378490175801448,
"eval_loss": 0.30534103512763977,
"eval_runtime": 31.5782,
"eval_samples_per_second": 15.549,
"eval_steps_per_second": 3.895,
"eval_wer": 0.43305394369224154,
"step": 2300
},
{
"epoch": 2.481902792140641,
"grad_norm": 1.2900283336639404,
"learning_rate": 0.0002761950882047734,
"loss": 0.4314,
"step": 2400
},
{
"epoch": 2.481902792140641,
"eval_loss": 0.30588600039482117,
"eval_runtime": 31.1067,
"eval_samples_per_second": 15.784,
"eval_steps_per_second": 3.954,
"eval_wer": 0.433698689017838,
"step": 2400
},
{
"epoch": 2.5853154084798344,
"grad_norm": 1.6208014488220215,
"learning_rate": 0.00027515738498789345,
"loss": 0.4144,
"step": 2500
},
{
"epoch": 2.5853154084798344,
"eval_loss": 0.30535265803337097,
"eval_runtime": 31.4136,
"eval_samples_per_second": 15.63,
"eval_steps_per_second": 3.915,
"eval_wer": 0.4382119062970127,
"step": 2500
},
{
"epoch": 2.688728024819028,
"grad_norm": 1.7971055507659912,
"learning_rate": 0.0002741196817710135,
"loss": 0.4099,
"step": 2600
},
{
"epoch": 2.688728024819028,
"eval_loss": 0.3059903085231781,
"eval_runtime": 31.3346,
"eval_samples_per_second": 15.67,
"eval_steps_per_second": 3.925,
"eval_wer": 0.4446593595529766,
"step": 2600
},
{
"epoch": 2.688728024819028,
"step": 2600,
"total_flos": 6.22653785519667e+18,
"train_loss": 0.718606931246244,
"train_runtime": 2586.1395,
"train_samples_per_second": 44.847,
"train_steps_per_second": 11.217
}
],
"logging_steps": 100,
"max_steps": 29010,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 1
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.22653785519667e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}