|
{ |
|
"best_metric": 0.21267759799957275, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-nyagen-female-model/checkpoint-2100", |
|
"epoch": 7.339449541284404, |
|
"eval_steps": 100, |
|
"global_step": 2400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3058103975535168, |
|
"grad_norm": 2.4750263690948486, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 7.8679, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3058103975535168, |
|
"eval_loss": 0.9127748608589172, |
|
"eval_runtime": 15.6762, |
|
"eval_samples_per_second": 10.207, |
|
"eval_steps_per_second": 2.552, |
|
"eval_wer": 0.6897243107769424, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6116207951070336, |
|
"grad_norm": 1.507306694984436, |
|
"learning_rate": 0.00029703398558187435, |
|
"loss": 0.627, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.6116207951070336, |
|
"eval_loss": 0.3264550268650055, |
|
"eval_runtime": 15.9193, |
|
"eval_samples_per_second": 10.051, |
|
"eval_steps_per_second": 2.513, |
|
"eval_wer": 0.43859649122807015, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 1.2747225761413574, |
|
"learning_rate": 0.0002939443872296601, |
|
"loss": 0.5004, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_loss": 0.29508110880851746, |
|
"eval_runtime": 15.668, |
|
"eval_samples_per_second": 10.212, |
|
"eval_steps_per_second": 2.553, |
|
"eval_wer": 0.40401002506265665, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.2232415902140672, |
|
"grad_norm": 1.1947221755981445, |
|
"learning_rate": 0.0002908547888774459, |
|
"loss": 0.4611, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.2232415902140672, |
|
"eval_loss": 0.27579107880592346, |
|
"eval_runtime": 15.5786, |
|
"eval_samples_per_second": 10.271, |
|
"eval_steps_per_second": 2.568, |
|
"eval_wer": 0.3659147869674185, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.529051987767584, |
|
"grad_norm": 0.9117503762245178, |
|
"learning_rate": 0.0002877651905252317, |
|
"loss": 0.4455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.529051987767584, |
|
"eval_loss": 0.266660213470459, |
|
"eval_runtime": 15.6734, |
|
"eval_samples_per_second": 10.208, |
|
"eval_steps_per_second": 2.552, |
|
"eval_wer": 0.3598997493734336, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.6930361390113831, |
|
"learning_rate": 0.0002846755921730175, |
|
"loss": 0.4142, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"eval_loss": 0.26014643907546997, |
|
"eval_runtime": 15.6292, |
|
"eval_samples_per_second": 10.237, |
|
"eval_steps_per_second": 2.559, |
|
"eval_wer": 0.35137844611528823, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.140672782874618, |
|
"grad_norm": 1.517812967300415, |
|
"learning_rate": 0.00028158599382080326, |
|
"loss": 0.3706, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.140672782874618, |
|
"eval_loss": 0.25211504101753235, |
|
"eval_runtime": 15.5542, |
|
"eval_samples_per_second": 10.287, |
|
"eval_steps_per_second": 2.572, |
|
"eval_wer": 0.34786967418546366, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.4464831804281344, |
|
"grad_norm": 0.9740973114967346, |
|
"learning_rate": 0.00027849639546858907, |
|
"loss": 0.3867, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4464831804281344, |
|
"eval_loss": 0.24611802399158478, |
|
"eval_runtime": 15.6894, |
|
"eval_samples_per_second": 10.198, |
|
"eval_steps_per_second": 2.549, |
|
"eval_wer": 0.31278195488721805, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.7522935779816513, |
|
"grad_norm": 2.278139114379883, |
|
"learning_rate": 0.0002754067971163748, |
|
"loss": 0.3537, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.7522935779816513, |
|
"eval_loss": 0.24492569267749786, |
|
"eval_runtime": 15.6703, |
|
"eval_samples_per_second": 10.21, |
|
"eval_steps_per_second": 2.553, |
|
"eval_wer": 0.3157894736842105, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.058103975535168, |
|
"grad_norm": 0.7251365780830383, |
|
"learning_rate": 0.0002723171987641606, |
|
"loss": 0.3821, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.058103975535168, |
|
"eval_loss": 0.24130229651927948, |
|
"eval_runtime": 15.7231, |
|
"eval_samples_per_second": 10.176, |
|
"eval_steps_per_second": 2.544, |
|
"eval_wer": 0.2932330827067669, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.363914373088685, |
|
"grad_norm": 10.20130729675293, |
|
"learning_rate": 0.00026922760041194643, |
|
"loss": 0.3626, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.363914373088685, |
|
"eval_loss": 0.23597605526447296, |
|
"eval_runtime": 15.6773, |
|
"eval_samples_per_second": 10.206, |
|
"eval_steps_per_second": 2.551, |
|
"eval_wer": 0.3082706766917293, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.669724770642202, |
|
"grad_norm": 0.775827944278717, |
|
"learning_rate": 0.00026613800205973223, |
|
"loss": 0.3312, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.669724770642202, |
|
"eval_loss": 0.2335551679134369, |
|
"eval_runtime": 15.7243, |
|
"eval_samples_per_second": 10.175, |
|
"eval_steps_per_second": 2.544, |
|
"eval_wer": 0.29974937343358393, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.9755351681957185, |
|
"grad_norm": 1.4778733253479004, |
|
"learning_rate": 0.00026307929969104015, |
|
"loss": 0.3322, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.9755351681957185, |
|
"eval_loss": 0.22845473885536194, |
|
"eval_runtime": 15.7244, |
|
"eval_samples_per_second": 10.175, |
|
"eval_steps_per_second": 2.544, |
|
"eval_wer": 0.2967418546365915, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.281345565749236, |
|
"grad_norm": 63.480228424072266, |
|
"learning_rate": 0.0002599897013388259, |
|
"loss": 0.3654, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.281345565749236, |
|
"eval_loss": 0.22345833480358124, |
|
"eval_runtime": 15.723, |
|
"eval_samples_per_second": 10.176, |
|
"eval_steps_per_second": 2.544, |
|
"eval_wer": 0.28521303258145364, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.587155963302752, |
|
"grad_norm": 0.6586478352546692, |
|
"learning_rate": 0.0002569001029866117, |
|
"loss": 0.3241, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.587155963302752, |
|
"eval_loss": 0.21982760727405548, |
|
"eval_runtime": 15.6138, |
|
"eval_samples_per_second": 10.247, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.2807017543859649, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.892966360856269, |
|
"grad_norm": 1.3852250576019287, |
|
"learning_rate": 0.0002538105046343975, |
|
"loss": 0.2908, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.892966360856269, |
|
"eval_loss": 0.21671359241008759, |
|
"eval_runtime": 15.6441, |
|
"eval_samples_per_second": 10.228, |
|
"eval_steps_per_second": 2.557, |
|
"eval_wer": 0.27669172932330827, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.198776758409786, |
|
"grad_norm": 1.687046766281128, |
|
"learning_rate": 0.0002507209062821833, |
|
"loss": 0.3299, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.198776758409786, |
|
"eval_loss": 0.21697847545146942, |
|
"eval_runtime": 15.6568, |
|
"eval_samples_per_second": 10.219, |
|
"eval_steps_per_second": 2.555, |
|
"eval_wer": 0.2746867167919799, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 5.504587155963303, |
|
"grad_norm": 1.117961049079895, |
|
"learning_rate": 0.00024763130792996907, |
|
"loss": 0.3128, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.504587155963303, |
|
"eval_loss": 0.21468114852905273, |
|
"eval_runtime": 15.7336, |
|
"eval_samples_per_second": 10.169, |
|
"eval_steps_per_second": 2.542, |
|
"eval_wer": 0.268671679197995, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.81039755351682, |
|
"grad_norm": 0.5044408440589905, |
|
"learning_rate": 0.0002445417095777549, |
|
"loss": 0.3094, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.81039755351682, |
|
"eval_loss": 0.21403858065605164, |
|
"eval_runtime": 15.6158, |
|
"eval_samples_per_second": 10.246, |
|
"eval_steps_per_second": 2.562, |
|
"eval_wer": 0.27418546365914787, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 6.116207951070336, |
|
"grad_norm": 0.6591036319732666, |
|
"learning_rate": 0.00024145211122554065, |
|
"loss": 0.309, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.116207951070336, |
|
"eval_loss": 0.21581442654132843, |
|
"eval_runtime": 15.7498, |
|
"eval_samples_per_second": 10.159, |
|
"eval_steps_per_second": 2.54, |
|
"eval_wer": 0.27017543859649124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.422018348623853, |
|
"grad_norm": 1.306911826133728, |
|
"learning_rate": 0.00023836251287332646, |
|
"loss": 0.3075, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.422018348623853, |
|
"eval_loss": 0.21267759799957275, |
|
"eval_runtime": 15.7059, |
|
"eval_samples_per_second": 10.187, |
|
"eval_steps_per_second": 2.547, |
|
"eval_wer": 0.26516290726817043, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.72782874617737, |
|
"grad_norm": 4.904253005981445, |
|
"learning_rate": 0.00023527291452111223, |
|
"loss": 0.2823, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.72782874617737, |
|
"eval_loss": 0.2155403196811676, |
|
"eval_runtime": 15.7469, |
|
"eval_samples_per_second": 10.161, |
|
"eval_steps_per_second": 2.54, |
|
"eval_wer": 0.2671679197994987, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 7.033639143730887, |
|
"grad_norm": 1.5606273412704468, |
|
"learning_rate": 0.00023221421215242018, |
|
"loss": 0.3062, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.033639143730887, |
|
"eval_loss": 0.21326474845409393, |
|
"eval_runtime": 15.6903, |
|
"eval_samples_per_second": 10.197, |
|
"eval_steps_per_second": 2.549, |
|
"eval_wer": 0.2556390977443609, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 7.339449541284404, |
|
"grad_norm": 1.3775653839111328, |
|
"learning_rate": 0.00022912461380020596, |
|
"loss": 0.3012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.339449541284404, |
|
"eval_loss": 0.2165716141462326, |
|
"eval_runtime": 15.6703, |
|
"eval_samples_per_second": 10.21, |
|
"eval_steps_per_second": 2.553, |
|
"eval_wer": 0.25413533834586466, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.339449541284404, |
|
"step": 2400, |
|
"total_flos": 9.832669015298243e+18, |
|
"train_loss": 0.6780771311124166, |
|
"train_runtime": 2242.1481, |
|
"train_samples_per_second": 17.461, |
|
"train_steps_per_second": 4.375 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 9810, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.832669015298243e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|