{ "best_metric": 0.20998027920722961, "best_model_checkpoint": "/scratch/skscla001/results/mms-zeroshot-300m-genbed-f-model/checkpoint-5600", "epoch": 16.986301369863014, "eval_steps": 200, "global_step": 6200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.547945205479452, "eval_loss": 2.3236358165740967, "eval_runtime": 58.3404, "eval_samples_per_second": 16.627, "eval_steps_per_second": 2.091, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.095890410958904, "eval_loss": 0.3330934941768646, "eval_runtime": 57.5798, "eval_samples_per_second": 16.846, "eval_steps_per_second": 2.119, "eval_wer": 0.5504350628424106, "step": 400 }, { "epoch": 1.36986301369863, "grad_norm": 0.44414466619491577, "learning_rate": 0.00028902304147465435, "loss": 2.6731, "step": 500 }, { "epoch": 1.643835616438356, "eval_loss": 0.29691705107688904, "eval_runtime": 57.9342, "eval_samples_per_second": 16.743, "eval_steps_per_second": 2.106, "eval_wer": 0.5189601460951767, "step": 600 }, { "epoch": 2.191780821917808, "eval_loss": 0.2805863320827484, "eval_runtime": 57.8326, "eval_samples_per_second": 16.773, "eval_steps_per_second": 2.11, "eval_wer": 0.5121925018799012, "step": 800 }, { "epoch": 2.73972602739726, "grad_norm": 0.2989409565925598, "learning_rate": 0.00027519815668202764, "loss": 0.4193, "step": 1000 }, { "epoch": 2.73972602739726, "eval_loss": 0.2701254189014435, "eval_runtime": 57.9519, "eval_samples_per_second": 16.738, "eval_steps_per_second": 2.105, "eval_wer": 0.4741647867654958, "step": 1000 }, { "epoch": 3.287671232876712, "eval_loss": 0.27029839158058167, "eval_runtime": 57.8498, "eval_samples_per_second": 16.768, "eval_steps_per_second": 2.109, "eval_wer": 0.47695778279084755, "step": 1200 }, { "epoch": 3.8356164383561646, "eval_loss": 0.2574402093887329, "eval_runtime": 57.9028, "eval_samples_per_second": 16.752, "eval_steps_per_second": 2.107, "eval_wer": 0.47577613062627566, "step": 1400 }, { "epoch": 4.109589041095891, "grad_norm": 0.5375602841377258, "learning_rate": 0.00026137327188940093, "loss": 0.367, "step": 1500 }, { "epoch": 4.383561643835616, "eval_loss": 0.24872656166553497, "eval_runtime": 57.6765, "eval_samples_per_second": 16.818, "eval_steps_per_second": 2.115, "eval_wer": 0.4547212375120851, "step": 1600 }, { "epoch": 4.931506849315069, "eval_loss": 0.247171550989151, "eval_runtime": 58.1732, "eval_samples_per_second": 16.674, "eval_steps_per_second": 2.097, "eval_wer": 0.43366634439789453, "step": 1800 }, { "epoch": 5.47945205479452, "grad_norm": 0.536637544631958, "learning_rate": 0.00024754838709677417, "loss": 0.3377, "step": 2000 }, { "epoch": 5.47945205479452, "eval_loss": 0.24240365624427795, "eval_runtime": 58.1149, "eval_samples_per_second": 16.691, "eval_steps_per_second": 2.099, "eval_wer": 0.44666451820818565, "step": 2000 }, { "epoch": 6.027397260273973, "eval_loss": 0.23721589148044586, "eval_runtime": 57.9941, "eval_samples_per_second": 16.726, "eval_steps_per_second": 2.104, "eval_wer": 0.42743581480287896, "step": 2200 }, { "epoch": 6.575342465753424, "eval_loss": 0.23664695024490356, "eval_runtime": 58.3196, "eval_samples_per_second": 16.632, "eval_steps_per_second": 2.092, "eval_wer": 0.4224943602964873, "step": 2400 }, { "epoch": 6.8493150684931505, "grad_norm": 0.45388591289520264, "learning_rate": 0.00023372350230414744, "loss": 0.3282, "step": 2500 }, { "epoch": 7.123287671232877, "eval_loss": 0.23390649259090424, "eval_runtime": 58.129, "eval_samples_per_second": 16.687, "eval_steps_per_second": 2.099, "eval_wer": 0.4103555698786121, "step": 2600 }, { "epoch": 7.671232876712329, "eval_loss": 0.23517899215221405, "eval_runtime": 57.9143, "eval_samples_per_second": 16.749, "eval_steps_per_second": 2.107, "eval_wer": 0.4192716725749275, "step": 2800 }, { "epoch": 8.219178082191782, "grad_norm": 0.6077154278755188, "learning_rate": 0.0002198986175115207, "loss": 0.3018, "step": 3000 }, { "epoch": 8.219178082191782, "eval_loss": 0.2249327301979065, "eval_runtime": 57.7536, "eval_samples_per_second": 16.795, "eval_steps_per_second": 2.112, "eval_wer": 0.4097110323343001, "step": 3000 }, { "epoch": 8.767123287671232, "eval_loss": 0.22541025280952454, "eval_runtime": 58.012, "eval_samples_per_second": 16.721, "eval_steps_per_second": 2.103, "eval_wer": 0.40648834461274036, "step": 3200 }, { "epoch": 9.315068493150685, "eval_loss": 0.2250933200120926, "eval_runtime": 58.2195, "eval_samples_per_second": 16.661, "eval_steps_per_second": 2.096, "eval_wer": 0.40208400472660866, "step": 3400 }, { "epoch": 9.58904109589041, "grad_norm": 0.45964235067367554, "learning_rate": 0.00020607373271889397, "loss": 0.2945, "step": 3500 }, { "epoch": 9.863013698630137, "eval_loss": 0.22482535243034363, "eval_runtime": 57.7885, "eval_samples_per_second": 16.785, "eval_steps_per_second": 2.111, "eval_wer": 0.396927704372113, "step": 3600 }, { "epoch": 10.41095890410959, "eval_loss": 0.22121191024780273, "eval_runtime": 58.297, "eval_samples_per_second": 16.639, "eval_steps_per_second": 2.093, "eval_wer": 0.4001503920936728, "step": 3800 }, { "epoch": 10.95890410958904, "grad_norm": 1.2428817749023438, "learning_rate": 0.0001922488479262673, "loss": 0.2843, "step": 4000 }, { "epoch": 10.95890410958904, "eval_loss": 0.21997873485088348, "eval_runtime": 57.8696, "eval_samples_per_second": 16.762, "eval_steps_per_second": 2.108, "eval_wer": 0.39198624986572134, "step": 4000 }, { "epoch": 11.506849315068493, "eval_loss": 0.21829502284526825, "eval_runtime": 57.8135, "eval_samples_per_second": 16.778, "eval_steps_per_second": 2.11, "eval_wer": 0.3853260285744978, "step": 4200 }, { "epoch": 12.054794520547945, "eval_loss": 0.21739539504051208, "eval_runtime": 57.004, "eval_samples_per_second": 17.016, "eval_steps_per_second": 2.14, "eval_wer": 0.3889784079922656, "step": 4400 }, { "epoch": 12.32876712328767, "grad_norm": 0.44710573554039, "learning_rate": 0.00017842396313364056, "loss": 0.2755, "step": 4500 }, { "epoch": 12.602739726027398, "eval_loss": 0.21631112694740295, "eval_runtime": 57.2582, "eval_samples_per_second": 16.941, "eval_steps_per_second": 2.131, "eval_wer": 0.3955312063594371, "step": 4600 }, { "epoch": 13.150684931506849, "eval_loss": 0.21974815428256989, "eval_runtime": 57.1085, "eval_samples_per_second": 16.985, "eval_steps_per_second": 2.136, "eval_wer": 0.3894080996884735, "step": 4800 }, { "epoch": 13.698630136986301, "grad_norm": 0.3474729061126709, "learning_rate": 0.00016459907834101383, "loss": 0.2699, "step": 5000 }, { "epoch": 13.698630136986301, "eval_loss": 0.21634985506534576, "eval_runtime": 57.267, "eval_samples_per_second": 16.938, "eval_steps_per_second": 2.13, "eval_wer": 0.38994521430873347, "step": 5000 }, { "epoch": 14.246575342465754, "eval_loss": 0.21288762986660004, "eval_runtime": 56.9225, "eval_samples_per_second": 17.041, "eval_steps_per_second": 2.143, "eval_wer": 0.37694704049844235, "step": 5200 }, { "epoch": 14.794520547945206, "eval_loss": 0.2114371657371521, "eval_runtime": 57.0542, "eval_samples_per_second": 17.001, "eval_steps_per_second": 2.138, "eval_wer": 0.37587281125792243, "step": 5400 }, { "epoch": 15.068493150684931, "grad_norm": 0.5010984539985657, "learning_rate": 0.0001507741935483871, "loss": 0.2568, "step": 5500 }, { "epoch": 15.342465753424657, "eval_loss": 0.20998027920722961, "eval_runtime": 56.9906, "eval_samples_per_second": 17.02, "eval_steps_per_second": 2.141, "eval_wer": 0.3721130089161027, "step": 5600 }, { "epoch": 15.89041095890411, "eval_loss": 0.2139931619167328, "eval_runtime": 56.9487, "eval_samples_per_second": 17.033, "eval_steps_per_second": 2.142, "eval_wer": 0.36695670856160706, "step": 5800 }, { "epoch": 16.438356164383563, "grad_norm": 0.3882729411125183, "learning_rate": 0.00013694930875576036, "loss": 0.2521, "step": 6000 }, { "epoch": 16.438356164383563, "eval_loss": 0.21490569412708282, "eval_runtime": 57.0453, "eval_samples_per_second": 17.004, "eval_steps_per_second": 2.139, "eval_wer": 0.37426146739714256, "step": 6000 }, { "epoch": 16.986301369863014, "eval_loss": 0.21307234466075897, "eval_runtime": 57.0514, "eval_samples_per_second": 17.002, "eval_steps_per_second": 2.138, "eval_wer": 0.3720055859920507, "step": 6200 }, { "epoch": 16.986301369863014, "step": 6200, "total_flos": 9.649821343406506e+18, "train_loss": 0.4968759044524162, "train_runtime": 6151.3654, "train_samples_per_second": 14.226, "train_steps_per_second": 1.78 } ], "logging_steps": 500, "max_steps": 10950, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.649821343406506e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }