{ "best_metric": 0.24565543234348297, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-combined-model/checkpoint-3900", "epoch": 2.1660649819494586, "eval_steps": 100, "global_step": 4200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05157297576070139, "grad_norm": 2.867751359939575, "learning_rate": 0.000285, "loss": 6.8762, "step": 100 }, { "epoch": 0.05157297576070139, "eval_loss": 0.9800576567649841, "eval_runtime": 61.3726, "eval_samples_per_second": 15.903, "eval_steps_per_second": 3.976, "eval_wer": 0.9386155855463648, "step": 100 }, { "epoch": 0.10314595152140278, "grad_norm": 4.205641269683838, "learning_rate": 0.0002995092130187704, "loss": 0.5788, "step": 200 }, { "epoch": 0.10314595152140278, "eval_loss": 0.34664157032966614, "eval_runtime": 60.8325, "eval_samples_per_second": 16.044, "eval_steps_per_second": 4.011, "eval_wer": 0.5014148889856335, "step": 200 }, { "epoch": 0.15471892728210418, "grad_norm": 16.612009048461914, "learning_rate": 0.00029899259514379193, "loss": 0.4891, "step": 300 }, { "epoch": 0.15471892728210418, "eval_loss": 0.3219561278820038, "eval_runtime": 60.7824, "eval_samples_per_second": 16.057, "eval_steps_per_second": 4.014, "eval_wer": 0.48204179364388333, "step": 300 }, { "epoch": 0.20629190304280556, "grad_norm": 15.71129035949707, "learning_rate": 0.0002984759772688135, "loss": 0.4386, "step": 400 }, { "epoch": 0.20629190304280556, "eval_loss": 0.30709779262542725, "eval_runtime": 60.8186, "eval_samples_per_second": 16.048, "eval_steps_per_second": 4.012, "eval_wer": 0.4801915542011319, "step": 400 }, { "epoch": 0.25786487880350695, "grad_norm": 2.6226158142089844, "learning_rate": 0.000297959359393835, "loss": 0.4272, "step": 500 }, { "epoch": 0.25786487880350695, "eval_loss": 0.30557531118392944, "eval_runtime": 61.7053, "eval_samples_per_second": 15.817, "eval_steps_per_second": 3.954, "eval_wer": 0.49880278624292557, "step": 500 }, { "epoch": 0.30943785456420836, "grad_norm": 2.265719413757324, "learning_rate": 0.00029744274151885655, "loss": 0.3982, "step": 600 }, { "epoch": 0.30943785456420836, "eval_loss": 0.2980726957321167, "eval_runtime": 61.2582, "eval_samples_per_second": 15.933, "eval_steps_per_second": 3.983, "eval_wer": 0.4625598606878537, "step": 600 }, { "epoch": 0.36101083032490977, "grad_norm": 1.4846241474151611, "learning_rate": 0.00029692612364387805, "loss": 0.425, "step": 700 }, { "epoch": 0.36101083032490977, "eval_loss": 0.2976619601249695, "eval_runtime": 61.0566, "eval_samples_per_second": 15.985, "eval_steps_per_second": 3.996, "eval_wer": 0.4631040487592512, "step": 700 }, { "epoch": 0.4125838060856111, "grad_norm": 1.5233323574066162, "learning_rate": 0.0002964095057688996, "loss": 0.4036, "step": 800 }, { "epoch": 0.4125838060856111, "eval_loss": 0.2897385358810425, "eval_runtime": 60.9529, "eval_samples_per_second": 16.012, "eval_steps_per_second": 4.003, "eval_wer": 0.44383979103178056, "step": 800 }, { "epoch": 0.46415678184631254, "grad_norm": 1.4918992519378662, "learning_rate": 0.0002958928878939211, "loss": 0.3903, "step": 900 }, { "epoch": 0.46415678184631254, "eval_loss": 0.28775253891944885, "eval_runtime": 61.5398, "eval_samples_per_second": 15.86, "eval_steps_per_second": 3.965, "eval_wer": 0.4626686983021332, "step": 900 }, { "epoch": 0.5157297576070139, "grad_norm": 35.91661071777344, "learning_rate": 0.0002953762700189426, "loss": 0.3758, "step": 1000 }, { "epoch": 0.5157297576070139, "eval_loss": 0.29262155294418335, "eval_runtime": 61.6256, "eval_samples_per_second": 15.838, "eval_steps_per_second": 3.959, "eval_wer": 0.4523291249455812, "step": 1000 }, { "epoch": 0.5673027333677153, "grad_norm": 2.335728645324707, "learning_rate": 0.0002948596521439642, "loss": 0.3861, "step": 1100 }, { "epoch": 0.5673027333677153, "eval_loss": 0.28073564171791077, "eval_runtime": 60.9884, "eval_samples_per_second": 16.003, "eval_steps_per_second": 4.001, "eval_wer": 0.44101001306051374, "step": 1100 }, { "epoch": 0.6188757091284167, "grad_norm": 1.2785513401031494, "learning_rate": 0.0002943430342689857, "loss": 0.3763, "step": 1200 }, { "epoch": 0.6188757091284167, "eval_loss": 0.2789745032787323, "eval_runtime": 61.0183, "eval_samples_per_second": 15.995, "eval_steps_per_second": 3.999, "eval_wer": 0.4330648672181106, "step": 1200 }, { "epoch": 0.6704486848891181, "grad_norm": 4.705647945404053, "learning_rate": 0.0002938264163940072, "loss": 0.3984, "step": 1300 }, { "epoch": 0.6704486848891181, "eval_loss": 0.2803143262863159, "eval_runtime": 61.7547, "eval_samples_per_second": 15.804, "eval_steps_per_second": 3.951, "eval_wer": 0.4312146277753592, "step": 1300 }, { "epoch": 0.7220216606498195, "grad_norm": 1.780588984489441, "learning_rate": 0.00029330979851902874, "loss": 0.373, "step": 1400 }, { "epoch": 0.7220216606498195, "eval_loss": 0.2802477478981018, "eval_runtime": 61.6178, "eval_samples_per_second": 15.84, "eval_steps_per_second": 3.96, "eval_wer": 0.42457553330430997, "step": 1400 }, { "epoch": 0.7735946364105208, "grad_norm": 2.3556313514709473, "learning_rate": 0.0002927931806440503, "loss": 0.3848, "step": 1500 }, { "epoch": 0.7735946364105208, "eval_loss": 0.2759012281894684, "eval_runtime": 61.2513, "eval_samples_per_second": 15.934, "eval_steps_per_second": 3.984, "eval_wer": 0.47518502394427514, "step": 1500 }, { "epoch": 0.8251676121712223, "grad_norm": 4.117414951324463, "learning_rate": 0.0002922765627690718, "loss": 0.4235, "step": 1600 }, { "epoch": 0.8251676121712223, "eval_loss": 0.2738034725189209, "eval_runtime": 60.9855, "eval_samples_per_second": 16.004, "eval_steps_per_second": 4.001, "eval_wer": 0.42675228558989986, "step": 1600 }, { "epoch": 0.8767405879319237, "grad_norm": 7.5644683837890625, "learning_rate": 0.0002917651110728431, "loss": 0.3704, "step": 1700 }, { "epoch": 0.8767405879319237, "eval_loss": 0.26875266432762146, "eval_runtime": 61.6159, "eval_samples_per_second": 15.84, "eval_steps_per_second": 3.96, "eval_wer": 0.4218545929473226, "step": 1700 }, { "epoch": 0.9283135636926251, "grad_norm": 2.0241034030914307, "learning_rate": 0.00029124849319786463, "loss": 0.3911, "step": 1800 }, { "epoch": 0.9283135636926251, "eval_loss": 0.2653037905693054, "eval_runtime": 61.6914, "eval_samples_per_second": 15.821, "eval_steps_per_second": 3.955, "eval_wer": 0.42011319111885065, "step": 1800 }, { "epoch": 0.9798865394533265, "grad_norm": 1.9250996112823486, "learning_rate": 0.00029073187532288613, "loss": 0.3954, "step": 1900 }, { "epoch": 0.9798865394533265, "eval_loss": 0.26971080899238586, "eval_runtime": 61.0364, "eval_samples_per_second": 15.99, "eval_steps_per_second": 3.998, "eval_wer": 0.4481932956029604, "step": 1900 }, { "epoch": 1.0314595152140278, "grad_norm": 2.152578353881836, "learning_rate": 0.0002902152574479077, "loss": 0.352, "step": 2000 }, { "epoch": 1.0314595152140278, "eval_loss": 0.26541659235954285, "eval_runtime": 61.1446, "eval_samples_per_second": 15.962, "eval_steps_per_second": 3.991, "eval_wer": 0.4154331737048324, "step": 2000 }, { "epoch": 1.0830324909747293, "grad_norm": 1.2193535566329956, "learning_rate": 0.0002896986395729292, "loss": 0.3808, "step": 2100 }, { "epoch": 1.0830324909747293, "eval_loss": 0.2631310522556305, "eval_runtime": 61.7949, "eval_samples_per_second": 15.794, "eval_steps_per_second": 3.949, "eval_wer": 0.40509360034828035, "step": 2100 }, { "epoch": 1.1346054667354306, "grad_norm": 0.9607815742492676, "learning_rate": 0.0002891820216979507, "loss": 0.3681, "step": 2200 }, { "epoch": 1.1346054667354306, "eval_loss": 0.26097217202186584, "eval_runtime": 61.6359, "eval_samples_per_second": 15.835, "eval_steps_per_second": 3.959, "eval_wer": 0.4218545929473226, "step": 2200 }, { "epoch": 1.1861784424961321, "grad_norm": 1.6105040311813354, "learning_rate": 0.00028866540382297226, "loss": 0.3355, "step": 2300 }, { "epoch": 1.1861784424961321, "eval_loss": 0.26081275939941406, "eval_runtime": 61.1026, "eval_samples_per_second": 15.973, "eval_steps_per_second": 3.993, "eval_wer": 0.40977361776229865, "step": 2300 }, { "epoch": 1.2377514182568334, "grad_norm": 2.3081679344177246, "learning_rate": 0.0002881487859479938, "loss": 0.342, "step": 2400 }, { "epoch": 1.2377514182568334, "eval_loss": 0.2601791322231293, "eval_runtime": 61.0993, "eval_samples_per_second": 15.974, "eval_steps_per_second": 3.993, "eval_wer": 0.40824989116238575, "step": 2400 }, { "epoch": 1.2893243940175347, "grad_norm": 0.6918842792510986, "learning_rate": 0.0002876321680730153, "loss": 0.347, "step": 2500 }, { "epoch": 1.2893243940175347, "eval_loss": 0.26280567049980164, "eval_runtime": 61.7023, "eval_samples_per_second": 15.818, "eval_steps_per_second": 3.954, "eval_wer": 0.40552895080539836, "step": 2500 }, { "epoch": 1.3408973697782363, "grad_norm": 0.920050323009491, "learning_rate": 0.0002871155501980368, "loss": 0.3409, "step": 2600 }, { "epoch": 1.3408973697782363, "eval_loss": 0.25879552960395813, "eval_runtime": 61.69, "eval_samples_per_second": 15.821, "eval_steps_per_second": 3.955, "eval_wer": 0.412929908576404, "step": 2600 }, { "epoch": 1.3924703455389376, "grad_norm": 0.7028564810752869, "learning_rate": 0.0002865989323230584, "loss": 0.3423, "step": 2700 }, { "epoch": 1.3924703455389376, "eval_loss": 0.2616526484489441, "eval_runtime": 61.13, "eval_samples_per_second": 15.966, "eval_steps_per_second": 3.991, "eval_wer": 0.41924249020461474, "step": 2700 }, { "epoch": 1.444043321299639, "grad_norm": 3.5141775608062744, "learning_rate": 0.0002860823144480799, "loss": 0.3341, "step": 2800 }, { "epoch": 1.444043321299639, "eval_loss": 0.25779473781585693, "eval_runtime": 61.3161, "eval_samples_per_second": 15.918, "eval_steps_per_second": 3.979, "eval_wer": 0.40552895080539836, "step": 2800 }, { "epoch": 1.4956162970603404, "grad_norm": 1.0055650472640991, "learning_rate": 0.0002855656965731014, "loss": 0.3425, "step": 2900 }, { "epoch": 1.4956162970603404, "eval_loss": 0.2579568922519684, "eval_runtime": 61.9639, "eval_samples_per_second": 15.751, "eval_steps_per_second": 3.938, "eval_wer": 0.39878101872006966, "step": 2900 }, { "epoch": 1.5471892728210417, "grad_norm": 1.4293900728225708, "learning_rate": 0.00028504907869812294, "loss": 0.337, "step": 3000 }, { "epoch": 1.5471892728210417, "eval_loss": 0.25681352615356445, "eval_runtime": 61.6019, "eval_samples_per_second": 15.844, "eval_steps_per_second": 3.961, "eval_wer": 0.40705267740531126, "step": 3000 }, { "epoch": 1.5987622485817432, "grad_norm": 0.6135945916175842, "learning_rate": 0.00028453246082314445, "loss": 0.3412, "step": 3100 }, { "epoch": 1.5987622485817432, "eval_loss": 0.25524020195007324, "eval_runtime": 61.308, "eval_samples_per_second": 15.92, "eval_steps_per_second": 3.98, "eval_wer": 0.39932520679146716, "step": 3100 }, { "epoch": 1.6503352243424445, "grad_norm": 4.757889270782471, "learning_rate": 0.00028401584294816595, "loss": 0.3837, "step": 3200 }, { "epoch": 1.6503352243424445, "eval_loss": 0.26221156120300293, "eval_runtime": 61.5959, "eval_samples_per_second": 15.845, "eval_steps_per_second": 3.961, "eval_wer": 0.40835872877666524, "step": 3200 }, { "epoch": 1.701908200103146, "grad_norm": 1.1227970123291016, "learning_rate": 0.0002834992250731875, "loss": 0.3372, "step": 3300 }, { "epoch": 1.701908200103146, "eval_loss": 0.2548165023326874, "eval_runtime": 61.9858, "eval_samples_per_second": 15.746, "eval_steps_per_second": 3.936, "eval_wer": 0.3991075315629081, "step": 3300 }, { "epoch": 1.7534811758638473, "grad_norm": 0.8613722324371338, "learning_rate": 0.00028298260719820907, "loss": 0.3394, "step": 3400 }, { "epoch": 1.7534811758638473, "eval_loss": 0.2535094916820526, "eval_runtime": 61.5961, "eval_samples_per_second": 15.845, "eval_steps_per_second": 3.961, "eval_wer": 0.4060731388767958, "step": 3400 }, { "epoch": 1.8050541516245486, "grad_norm": 0.5101200938224792, "learning_rate": 0.00028246598932323057, "loss": 0.3542, "step": 3500 }, { "epoch": 1.8050541516245486, "eval_loss": 0.25123441219329834, "eval_runtime": 61.2846, "eval_samples_per_second": 15.926, "eval_steps_per_second": 3.981, "eval_wer": 0.39268611232041795, "step": 3500 }, { "epoch": 1.8566271273852502, "grad_norm": 1.271552324295044, "learning_rate": 0.0002819493714482521, "loss": 0.3368, "step": 3600 }, { "epoch": 1.8566271273852502, "eval_loss": 0.258027583360672, "eval_runtime": 61.4346, "eval_samples_per_second": 15.887, "eval_steps_per_second": 3.972, "eval_wer": 0.4004135829342621, "step": 3600 }, { "epoch": 1.9082001031459517, "grad_norm": 1.8105818033218384, "learning_rate": 0.00028143275357327363, "loss": 0.3807, "step": 3700 }, { "epoch": 1.9082001031459517, "eval_loss": 0.24900555610656738, "eval_runtime": 61.7426, "eval_samples_per_second": 15.808, "eval_steps_per_second": 3.952, "eval_wer": 0.39747496734871574, "step": 3700 }, { "epoch": 1.959773078906653, "grad_norm": 1.8133718967437744, "learning_rate": 0.00028091613569829514, "loss": 0.3454, "step": 3800 }, { "epoch": 1.959773078906653, "eval_loss": 0.2513742446899414, "eval_runtime": 61.6449, "eval_samples_per_second": 15.833, "eval_steps_per_second": 3.958, "eval_wer": 0.40019590770570307, "step": 3800 }, { "epoch": 2.0113460546673543, "grad_norm": 0.7546507120132446, "learning_rate": 0.00028039951782331664, "loss": 0.3456, "step": 3900 }, { "epoch": 2.0113460546673543, "eval_loss": 0.24565543234348297, "eval_runtime": 61.2207, "eval_samples_per_second": 15.942, "eval_steps_per_second": 3.986, "eval_wer": 0.3931214627775359, "step": 3900 }, { "epoch": 2.0629190304280556, "grad_norm": 0.9227738976478577, "learning_rate": 0.0002798828999483382, "loss": 0.3202, "step": 4000 }, { "epoch": 2.0629190304280556, "eval_loss": 0.24660241603851318, "eval_runtime": 61.4456, "eval_samples_per_second": 15.884, "eval_steps_per_second": 3.971, "eval_wer": 0.391597736177623, "step": 4000 }, { "epoch": 2.114492006188757, "grad_norm": 1.0991692543029785, "learning_rate": 0.00027937144825210947, "loss": 0.3233, "step": 4100 }, { "epoch": 2.114492006188757, "eval_loss": 0.2494671791791916, "eval_runtime": 61.9678, "eval_samples_per_second": 15.75, "eval_steps_per_second": 3.938, "eval_wer": 0.39747496734871574, "step": 4100 }, { "epoch": 2.1660649819494586, "grad_norm": 0.6972938776016235, "learning_rate": 0.000278854830377131, "loss": 0.3052, "step": 4200 }, { "epoch": 2.1660649819494586, "eval_loss": 0.247751384973526, "eval_runtime": 61.6785, "eval_samples_per_second": 15.824, "eval_steps_per_second": 3.956, "eval_wer": 0.38985633434915107, "step": 4200 }, { "epoch": 2.1660649819494586, "step": 4200, "total_flos": 1.0016492328632693e+19, "train_loss": 0.5294508952186221, "train_runtime": 6287.6058, "train_samples_per_second": 37.006, "train_steps_per_second": 9.252 } ], "logging_steps": 100, "max_steps": 58170, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0016492328632693e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }