{ "best_metric": 0.21267759799957275, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-female-model/checkpoint-2100", "epoch": 7.339449541284404, "eval_steps": 100, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.3058103975535168, "grad_norm": 2.4750263690948486, "learning_rate": 0.00028799999999999995, "loss": 7.8679, "step": 100 }, { "epoch": 0.3058103975535168, "eval_loss": 0.9127748608589172, "eval_runtime": 15.6762, "eval_samples_per_second": 10.207, "eval_steps_per_second": 2.552, "eval_wer": 0.6897243107769424, "step": 100 }, { "epoch": 0.6116207951070336, "grad_norm": 1.507306694984436, "learning_rate": 0.00029703398558187435, "loss": 0.627, "step": 200 }, { "epoch": 0.6116207951070336, "eval_loss": 0.3264550268650055, "eval_runtime": 15.9193, "eval_samples_per_second": 10.051, "eval_steps_per_second": 2.513, "eval_wer": 0.43859649122807015, "step": 200 }, { "epoch": 0.9174311926605505, "grad_norm": 1.2747225761413574, "learning_rate": 0.0002939443872296601, "loss": 0.5004, "step": 300 }, { "epoch": 0.9174311926605505, "eval_loss": 0.29508110880851746, "eval_runtime": 15.668, "eval_samples_per_second": 10.212, "eval_steps_per_second": 2.553, "eval_wer": 0.40401002506265665, "step": 300 }, { "epoch": 1.2232415902140672, "grad_norm": 1.1947221755981445, "learning_rate": 0.0002908547888774459, "loss": 0.4611, "step": 400 }, { "epoch": 1.2232415902140672, "eval_loss": 0.27579107880592346, "eval_runtime": 15.5786, "eval_samples_per_second": 10.271, "eval_steps_per_second": 2.568, "eval_wer": 0.3659147869674185, "step": 400 }, { "epoch": 1.529051987767584, "grad_norm": 0.9117503762245178, "learning_rate": 0.0002877651905252317, "loss": 0.4455, "step": 500 }, { "epoch": 1.529051987767584, "eval_loss": 0.266660213470459, "eval_runtime": 15.6734, "eval_samples_per_second": 10.208, "eval_steps_per_second": 2.552, "eval_wer": 0.3598997493734336, "step": 500 }, { "epoch": 1.834862385321101, "grad_norm": 0.6930361390113831, "learning_rate": 0.0002846755921730175, "loss": 0.4142, "step": 600 }, { "epoch": 1.834862385321101, "eval_loss": 0.26014643907546997, "eval_runtime": 15.6292, "eval_samples_per_second": 10.237, "eval_steps_per_second": 2.559, "eval_wer": 0.35137844611528823, "step": 600 }, { "epoch": 2.140672782874618, "grad_norm": 1.517812967300415, "learning_rate": 0.00028158599382080326, "loss": 0.3706, "step": 700 }, { "epoch": 2.140672782874618, "eval_loss": 0.25211504101753235, "eval_runtime": 15.5542, "eval_samples_per_second": 10.287, "eval_steps_per_second": 2.572, "eval_wer": 0.34786967418546366, "step": 700 }, { "epoch": 2.4464831804281344, "grad_norm": 0.9740973114967346, "learning_rate": 0.00027849639546858907, "loss": 0.3867, "step": 800 }, { "epoch": 2.4464831804281344, "eval_loss": 0.24611802399158478, "eval_runtime": 15.6894, "eval_samples_per_second": 10.198, "eval_steps_per_second": 2.549, "eval_wer": 0.31278195488721805, "step": 800 }, { "epoch": 2.7522935779816513, "grad_norm": 2.278139114379883, "learning_rate": 0.0002754067971163748, "loss": 0.3537, "step": 900 }, { "epoch": 2.7522935779816513, "eval_loss": 0.24492569267749786, "eval_runtime": 15.6703, "eval_samples_per_second": 10.21, "eval_steps_per_second": 2.553, "eval_wer": 0.3157894736842105, "step": 900 }, { "epoch": 3.058103975535168, "grad_norm": 0.7251365780830383, "learning_rate": 0.0002723171987641606, "loss": 0.3821, "step": 1000 }, { "epoch": 3.058103975535168, "eval_loss": 0.24130229651927948, "eval_runtime": 15.7231, "eval_samples_per_second": 10.176, "eval_steps_per_second": 2.544, "eval_wer": 0.2932330827067669, "step": 1000 }, { "epoch": 3.363914373088685, "grad_norm": 10.20130729675293, "learning_rate": 0.00026922760041194643, "loss": 0.3626, "step": 1100 }, { "epoch": 3.363914373088685, "eval_loss": 0.23597605526447296, "eval_runtime": 15.6773, "eval_samples_per_second": 10.206, "eval_steps_per_second": 2.551, "eval_wer": 0.3082706766917293, "step": 1100 }, { "epoch": 3.669724770642202, "grad_norm": 0.775827944278717, "learning_rate": 0.00026613800205973223, "loss": 0.3312, "step": 1200 }, { "epoch": 3.669724770642202, "eval_loss": 0.2335551679134369, "eval_runtime": 15.7243, "eval_samples_per_second": 10.175, "eval_steps_per_second": 2.544, "eval_wer": 0.29974937343358393, "step": 1200 }, { "epoch": 3.9755351681957185, "grad_norm": 1.4778733253479004, "learning_rate": 0.00026307929969104015, "loss": 0.3322, "step": 1300 }, { "epoch": 3.9755351681957185, "eval_loss": 0.22845473885536194, "eval_runtime": 15.7244, "eval_samples_per_second": 10.175, "eval_steps_per_second": 2.544, "eval_wer": 0.2967418546365915, "step": 1300 }, { "epoch": 4.281345565749236, "grad_norm": 63.480228424072266, "learning_rate": 0.0002599897013388259, "loss": 0.3654, "step": 1400 }, { "epoch": 4.281345565749236, "eval_loss": 0.22345833480358124, "eval_runtime": 15.723, "eval_samples_per_second": 10.176, "eval_steps_per_second": 2.544, "eval_wer": 0.28521303258145364, "step": 1400 }, { "epoch": 4.587155963302752, "grad_norm": 0.6586478352546692, "learning_rate": 0.0002569001029866117, "loss": 0.3241, "step": 1500 }, { "epoch": 4.587155963302752, "eval_loss": 0.21982760727405548, "eval_runtime": 15.6138, "eval_samples_per_second": 10.247, "eval_steps_per_second": 2.562, "eval_wer": 0.2807017543859649, "step": 1500 }, { "epoch": 4.892966360856269, "grad_norm": 1.3852250576019287, "learning_rate": 0.0002538105046343975, "loss": 0.2908, "step": 1600 }, { "epoch": 4.892966360856269, "eval_loss": 0.21671359241008759, "eval_runtime": 15.6441, "eval_samples_per_second": 10.228, "eval_steps_per_second": 2.557, "eval_wer": 0.27669172932330827, "step": 1600 }, { "epoch": 5.198776758409786, "grad_norm": 1.687046766281128, "learning_rate": 0.0002507209062821833, "loss": 0.3299, "step": 1700 }, { "epoch": 5.198776758409786, "eval_loss": 0.21697847545146942, "eval_runtime": 15.6568, "eval_samples_per_second": 10.219, "eval_steps_per_second": 2.555, "eval_wer": 0.2746867167919799, "step": 1700 }, { "epoch": 5.504587155963303, "grad_norm": 1.117961049079895, "learning_rate": 0.00024763130792996907, "loss": 0.3128, "step": 1800 }, { "epoch": 5.504587155963303, "eval_loss": 0.21468114852905273, "eval_runtime": 15.7336, "eval_samples_per_second": 10.169, "eval_steps_per_second": 2.542, "eval_wer": 0.268671679197995, "step": 1800 }, { "epoch": 5.81039755351682, "grad_norm": 0.5044408440589905, "learning_rate": 0.0002445417095777549, "loss": 0.3094, "step": 1900 }, { "epoch": 5.81039755351682, "eval_loss": 0.21403858065605164, "eval_runtime": 15.6158, "eval_samples_per_second": 10.246, "eval_steps_per_second": 2.562, "eval_wer": 0.27418546365914787, "step": 1900 }, { "epoch": 6.116207951070336, "grad_norm": 0.6591036319732666, "learning_rate": 0.00024145211122554065, "loss": 0.309, "step": 2000 }, { "epoch": 6.116207951070336, "eval_loss": 0.21581442654132843, "eval_runtime": 15.7498, "eval_samples_per_second": 10.159, "eval_steps_per_second": 2.54, "eval_wer": 0.27017543859649124, "step": 2000 }, { "epoch": 6.422018348623853, "grad_norm": 1.306911826133728, "learning_rate": 0.00023836251287332646, "loss": 0.3075, "step": 2100 }, { "epoch": 6.422018348623853, "eval_loss": 0.21267759799957275, "eval_runtime": 15.7059, "eval_samples_per_second": 10.187, "eval_steps_per_second": 2.547, "eval_wer": 0.26516290726817043, "step": 2100 }, { "epoch": 6.72782874617737, "grad_norm": 4.904253005981445, "learning_rate": 0.00023527291452111223, "loss": 0.2823, "step": 2200 }, { "epoch": 6.72782874617737, "eval_loss": 0.2155403196811676, "eval_runtime": 15.7469, "eval_samples_per_second": 10.161, "eval_steps_per_second": 2.54, "eval_wer": 0.2671679197994987, "step": 2200 }, { "epoch": 7.033639143730887, "grad_norm": 1.5606273412704468, "learning_rate": 0.00023221421215242018, "loss": 0.3062, "step": 2300 }, { "epoch": 7.033639143730887, "eval_loss": 0.21326474845409393, "eval_runtime": 15.6903, "eval_samples_per_second": 10.197, "eval_steps_per_second": 2.549, "eval_wer": 0.2556390977443609, "step": 2300 }, { "epoch": 7.339449541284404, "grad_norm": 1.3775653839111328, "learning_rate": 0.00022912461380020596, "loss": 0.3012, "step": 2400 }, { "epoch": 7.339449541284404, "eval_loss": 0.2165716141462326, "eval_runtime": 15.6703, "eval_samples_per_second": 10.21, "eval_steps_per_second": 2.553, "eval_wer": 0.25413533834586466, "step": 2400 }, { "epoch": 7.339449541284404, "step": 2400, "total_flos": 9.832669015298243e+18, "train_loss": 0.6780771311124166, "train_runtime": 2242.1481, "train_samples_per_second": 17.461, "train_steps_per_second": 4.375 } ], "logging_steps": 100, "max_steps": 9810, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.832669015298243e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }