mms-1b-nyagen-female-model / trainer_state.json
csikasote's picture
End of training
4f3ac18 verified
{
"best_metric": 0.21267759799957275,
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-female-model/checkpoint-2100",
"epoch": 7.339449541284404,
"eval_steps": 100,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.3058103975535168,
"grad_norm": 2.4750263690948486,
"learning_rate": 0.00028799999999999995,
"loss": 7.8679,
"step": 100
},
{
"epoch": 0.3058103975535168,
"eval_loss": 0.9127748608589172,
"eval_runtime": 15.6762,
"eval_samples_per_second": 10.207,
"eval_steps_per_second": 2.552,
"eval_wer": 0.6897243107769424,
"step": 100
},
{
"epoch": 0.6116207951070336,
"grad_norm": 1.507306694984436,
"learning_rate": 0.00029703398558187435,
"loss": 0.627,
"step": 200
},
{
"epoch": 0.6116207951070336,
"eval_loss": 0.3264550268650055,
"eval_runtime": 15.9193,
"eval_samples_per_second": 10.051,
"eval_steps_per_second": 2.513,
"eval_wer": 0.43859649122807015,
"step": 200
},
{
"epoch": 0.9174311926605505,
"grad_norm": 1.2747225761413574,
"learning_rate": 0.0002939443872296601,
"loss": 0.5004,
"step": 300
},
{
"epoch": 0.9174311926605505,
"eval_loss": 0.29508110880851746,
"eval_runtime": 15.668,
"eval_samples_per_second": 10.212,
"eval_steps_per_second": 2.553,
"eval_wer": 0.40401002506265665,
"step": 300
},
{
"epoch": 1.2232415902140672,
"grad_norm": 1.1947221755981445,
"learning_rate": 0.0002908547888774459,
"loss": 0.4611,
"step": 400
},
{
"epoch": 1.2232415902140672,
"eval_loss": 0.27579107880592346,
"eval_runtime": 15.5786,
"eval_samples_per_second": 10.271,
"eval_steps_per_second": 2.568,
"eval_wer": 0.3659147869674185,
"step": 400
},
{
"epoch": 1.529051987767584,
"grad_norm": 0.9117503762245178,
"learning_rate": 0.0002877651905252317,
"loss": 0.4455,
"step": 500
},
{
"epoch": 1.529051987767584,
"eval_loss": 0.266660213470459,
"eval_runtime": 15.6734,
"eval_samples_per_second": 10.208,
"eval_steps_per_second": 2.552,
"eval_wer": 0.3598997493734336,
"step": 500
},
{
"epoch": 1.834862385321101,
"grad_norm": 0.6930361390113831,
"learning_rate": 0.0002846755921730175,
"loss": 0.4142,
"step": 600
},
{
"epoch": 1.834862385321101,
"eval_loss": 0.26014643907546997,
"eval_runtime": 15.6292,
"eval_samples_per_second": 10.237,
"eval_steps_per_second": 2.559,
"eval_wer": 0.35137844611528823,
"step": 600
},
{
"epoch": 2.140672782874618,
"grad_norm": 1.517812967300415,
"learning_rate": 0.00028158599382080326,
"loss": 0.3706,
"step": 700
},
{
"epoch": 2.140672782874618,
"eval_loss": 0.25211504101753235,
"eval_runtime": 15.5542,
"eval_samples_per_second": 10.287,
"eval_steps_per_second": 2.572,
"eval_wer": 0.34786967418546366,
"step": 700
},
{
"epoch": 2.4464831804281344,
"grad_norm": 0.9740973114967346,
"learning_rate": 0.00027849639546858907,
"loss": 0.3867,
"step": 800
},
{
"epoch": 2.4464831804281344,
"eval_loss": 0.24611802399158478,
"eval_runtime": 15.6894,
"eval_samples_per_second": 10.198,
"eval_steps_per_second": 2.549,
"eval_wer": 0.31278195488721805,
"step": 800
},
{
"epoch": 2.7522935779816513,
"grad_norm": 2.278139114379883,
"learning_rate": 0.0002754067971163748,
"loss": 0.3537,
"step": 900
},
{
"epoch": 2.7522935779816513,
"eval_loss": 0.24492569267749786,
"eval_runtime": 15.6703,
"eval_samples_per_second": 10.21,
"eval_steps_per_second": 2.553,
"eval_wer": 0.3157894736842105,
"step": 900
},
{
"epoch": 3.058103975535168,
"grad_norm": 0.7251365780830383,
"learning_rate": 0.0002723171987641606,
"loss": 0.3821,
"step": 1000
},
{
"epoch": 3.058103975535168,
"eval_loss": 0.24130229651927948,
"eval_runtime": 15.7231,
"eval_samples_per_second": 10.176,
"eval_steps_per_second": 2.544,
"eval_wer": 0.2932330827067669,
"step": 1000
},
{
"epoch": 3.363914373088685,
"grad_norm": 10.20130729675293,
"learning_rate": 0.00026922760041194643,
"loss": 0.3626,
"step": 1100
},
{
"epoch": 3.363914373088685,
"eval_loss": 0.23597605526447296,
"eval_runtime": 15.6773,
"eval_samples_per_second": 10.206,
"eval_steps_per_second": 2.551,
"eval_wer": 0.3082706766917293,
"step": 1100
},
{
"epoch": 3.669724770642202,
"grad_norm": 0.775827944278717,
"learning_rate": 0.00026613800205973223,
"loss": 0.3312,
"step": 1200
},
{
"epoch": 3.669724770642202,
"eval_loss": 0.2335551679134369,
"eval_runtime": 15.7243,
"eval_samples_per_second": 10.175,
"eval_steps_per_second": 2.544,
"eval_wer": 0.29974937343358393,
"step": 1200
},
{
"epoch": 3.9755351681957185,
"grad_norm": 1.4778733253479004,
"learning_rate": 0.00026307929969104015,
"loss": 0.3322,
"step": 1300
},
{
"epoch": 3.9755351681957185,
"eval_loss": 0.22845473885536194,
"eval_runtime": 15.7244,
"eval_samples_per_second": 10.175,
"eval_steps_per_second": 2.544,
"eval_wer": 0.2967418546365915,
"step": 1300
},
{
"epoch": 4.281345565749236,
"grad_norm": 63.480228424072266,
"learning_rate": 0.0002599897013388259,
"loss": 0.3654,
"step": 1400
},
{
"epoch": 4.281345565749236,
"eval_loss": 0.22345833480358124,
"eval_runtime": 15.723,
"eval_samples_per_second": 10.176,
"eval_steps_per_second": 2.544,
"eval_wer": 0.28521303258145364,
"step": 1400
},
{
"epoch": 4.587155963302752,
"grad_norm": 0.6586478352546692,
"learning_rate": 0.0002569001029866117,
"loss": 0.3241,
"step": 1500
},
{
"epoch": 4.587155963302752,
"eval_loss": 0.21982760727405548,
"eval_runtime": 15.6138,
"eval_samples_per_second": 10.247,
"eval_steps_per_second": 2.562,
"eval_wer": 0.2807017543859649,
"step": 1500
},
{
"epoch": 4.892966360856269,
"grad_norm": 1.3852250576019287,
"learning_rate": 0.0002538105046343975,
"loss": 0.2908,
"step": 1600
},
{
"epoch": 4.892966360856269,
"eval_loss": 0.21671359241008759,
"eval_runtime": 15.6441,
"eval_samples_per_second": 10.228,
"eval_steps_per_second": 2.557,
"eval_wer": 0.27669172932330827,
"step": 1600
},
{
"epoch": 5.198776758409786,
"grad_norm": 1.687046766281128,
"learning_rate": 0.0002507209062821833,
"loss": 0.3299,
"step": 1700
},
{
"epoch": 5.198776758409786,
"eval_loss": 0.21697847545146942,
"eval_runtime": 15.6568,
"eval_samples_per_second": 10.219,
"eval_steps_per_second": 2.555,
"eval_wer": 0.2746867167919799,
"step": 1700
},
{
"epoch": 5.504587155963303,
"grad_norm": 1.117961049079895,
"learning_rate": 0.00024763130792996907,
"loss": 0.3128,
"step": 1800
},
{
"epoch": 5.504587155963303,
"eval_loss": 0.21468114852905273,
"eval_runtime": 15.7336,
"eval_samples_per_second": 10.169,
"eval_steps_per_second": 2.542,
"eval_wer": 0.268671679197995,
"step": 1800
},
{
"epoch": 5.81039755351682,
"grad_norm": 0.5044408440589905,
"learning_rate": 0.0002445417095777549,
"loss": 0.3094,
"step": 1900
},
{
"epoch": 5.81039755351682,
"eval_loss": 0.21403858065605164,
"eval_runtime": 15.6158,
"eval_samples_per_second": 10.246,
"eval_steps_per_second": 2.562,
"eval_wer": 0.27418546365914787,
"step": 1900
},
{
"epoch": 6.116207951070336,
"grad_norm": 0.6591036319732666,
"learning_rate": 0.00024145211122554065,
"loss": 0.309,
"step": 2000
},
{
"epoch": 6.116207951070336,
"eval_loss": 0.21581442654132843,
"eval_runtime": 15.7498,
"eval_samples_per_second": 10.159,
"eval_steps_per_second": 2.54,
"eval_wer": 0.27017543859649124,
"step": 2000
},
{
"epoch": 6.422018348623853,
"grad_norm": 1.306911826133728,
"learning_rate": 0.00023836251287332646,
"loss": 0.3075,
"step": 2100
},
{
"epoch": 6.422018348623853,
"eval_loss": 0.21267759799957275,
"eval_runtime": 15.7059,
"eval_samples_per_second": 10.187,
"eval_steps_per_second": 2.547,
"eval_wer": 0.26516290726817043,
"step": 2100
},
{
"epoch": 6.72782874617737,
"grad_norm": 4.904253005981445,
"learning_rate": 0.00023527291452111223,
"loss": 0.2823,
"step": 2200
},
{
"epoch": 6.72782874617737,
"eval_loss": 0.2155403196811676,
"eval_runtime": 15.7469,
"eval_samples_per_second": 10.161,
"eval_steps_per_second": 2.54,
"eval_wer": 0.2671679197994987,
"step": 2200
},
{
"epoch": 7.033639143730887,
"grad_norm": 1.5606273412704468,
"learning_rate": 0.00023221421215242018,
"loss": 0.3062,
"step": 2300
},
{
"epoch": 7.033639143730887,
"eval_loss": 0.21326474845409393,
"eval_runtime": 15.6903,
"eval_samples_per_second": 10.197,
"eval_steps_per_second": 2.549,
"eval_wer": 0.2556390977443609,
"step": 2300
},
{
"epoch": 7.339449541284404,
"grad_norm": 1.3775653839111328,
"learning_rate": 0.00022912461380020596,
"loss": 0.3012,
"step": 2400
},
{
"epoch": 7.339449541284404,
"eval_loss": 0.2165716141462326,
"eval_runtime": 15.6703,
"eval_samples_per_second": 10.21,
"eval_steps_per_second": 2.553,
"eval_wer": 0.25413533834586466,
"step": 2400
},
{
"epoch": 7.339449541284404,
"step": 2400,
"total_flos": 9.832669015298243e+18,
"train_loss": 0.6780771311124166,
"train_runtime": 2242.1481,
"train_samples_per_second": 17.461,
"train_steps_per_second": 4.375
}
],
"logging_steps": 100,
"max_steps": 9810,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 400,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9.832669015298243e+18,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}