{ "best_metric": 0.258684903383255, "best_model_checkpoint": "/scratch/skscla001/speech/results/mms-1b-bemgen-combined-model/checkpoint-2200", "epoch": 1.2893243940175347, "eval_steps": 100, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05157297576070139, "grad_norm": 3.681674003601074, "learning_rate": 0.00028799999999999995, "loss": 6.7553, "step": 100 }, { "epoch": 0.05157297576070139, "eval_loss": 0.8774181604385376, "eval_runtime": 57.8286, "eval_samples_per_second": 16.877, "eval_steps_per_second": 2.11, "eval_wer": 0.847627340008707, "step": 100 }, { "epoch": 0.10314595152140278, "grad_norm": 2.3965742588043213, "learning_rate": 0.00029950404684002066, "loss": 0.5648, "step": 200 }, { "epoch": 0.10314595152140278, "eval_loss": 0.34078654646873474, "eval_runtime": 57.7359, "eval_samples_per_second": 16.905, "eval_steps_per_second": 2.113, "eval_wer": 0.5031562908141054, "step": 200 }, { "epoch": 0.15471892728210418, "grad_norm": 12.905903816223145, "learning_rate": 0.00029898742896504216, "loss": 0.4827, "step": 300 }, { "epoch": 0.15471892728210418, "eval_loss": 0.3261478543281555, "eval_runtime": 57.6285, "eval_samples_per_second": 16.936, "eval_steps_per_second": 2.117, "eval_wer": 0.4930343926861123, "step": 300 }, { "epoch": 0.20629190304280556, "grad_norm": 1.9552323818206787, "learning_rate": 0.00029847081109006366, "loss": 0.4321, "step": 400 }, { "epoch": 0.20629190304280556, "eval_loss": 0.303559273481369, "eval_runtime": 57.7126, "eval_samples_per_second": 16.911, "eval_steps_per_second": 2.114, "eval_wer": 0.48541575968654765, "step": 400 }, { "epoch": 0.25786487880350695, "grad_norm": 2.4878430366516113, "learning_rate": 0.0002979541932150852, "loss": 0.4168, "step": 500 }, { "epoch": 0.25786487880350695, "eval_loss": 0.2988507151603699, "eval_runtime": 57.9159, "eval_samples_per_second": 16.852, "eval_steps_per_second": 2.107, "eval_wer": 0.4783413147583805, "step": 500 }, { "epoch": 0.30943785456420836, "grad_norm": 3.2817885875701904, "learning_rate": 0.0002974375753401067, "loss": 0.3965, "step": 600 }, { "epoch": 0.30943785456420836, "eval_loss": 0.2906985282897949, "eval_runtime": 57.5951, "eval_samples_per_second": 16.946, "eval_steps_per_second": 2.118, "eval_wer": 0.45134958641706574, "step": 600 }, { "epoch": 0.36101083032490977, "grad_norm": 1.416237235069275, "learning_rate": 0.0002969209574651283, "loss": 0.4199, "step": 700 }, { "epoch": 0.36101083032490977, "eval_loss": 0.29259440302848816, "eval_runtime": 57.5412, "eval_samples_per_second": 16.962, "eval_steps_per_second": 2.12, "eval_wer": 0.4718110579016108, "step": 700 }, { "epoch": 0.4125838060856111, "grad_norm": 2.3475992679595947, "learning_rate": 0.0002964043395901498, "loss": 0.3975, "step": 800 }, { "epoch": 0.4125838060856111, "eval_loss": 0.288595974445343, "eval_runtime": 57.3013, "eval_samples_per_second": 17.033, "eval_steps_per_second": 2.129, "eval_wer": 0.44590770570309096, "step": 800 }, { "epoch": 0.46415678184631254, "grad_norm": 1.5925949811935425, "learning_rate": 0.00029588772171517134, "loss": 0.3839, "step": 900 }, { "epoch": 0.46415678184631254, "eval_loss": 0.29075172543525696, "eval_runtime": 57.8589, "eval_samples_per_second": 16.869, "eval_steps_per_second": 2.109, "eval_wer": 0.4722464083587288, "step": 900 }, { "epoch": 0.5157297576070139, "grad_norm": 2.456458568572998, "learning_rate": 0.00029537110384019285, "loss": 0.3673, "step": 1000 }, { "epoch": 0.5157297576070139, "eval_loss": 0.2836114764213562, "eval_runtime": 57.7048, "eval_samples_per_second": 16.914, "eval_steps_per_second": 2.114, "eval_wer": 0.44449281671745755, "step": 1000 }, { "epoch": 0.5673027333677153, "grad_norm": 2.249444007873535, "learning_rate": 0.00029485448596521435, "loss": 0.3777, "step": 1100 }, { "epoch": 0.5673027333677153, "eval_loss": 0.27841705083847046, "eval_runtime": 57.4013, "eval_samples_per_second": 17.003, "eval_steps_per_second": 2.125, "eval_wer": 0.4365476708750544, "step": 1100 }, { "epoch": 0.6188757091284167, "grad_norm": 1.6211791038513184, "learning_rate": 0.0002943378680902359, "loss": 0.3764, "step": 1200 }, { "epoch": 0.6188757091284167, "eval_loss": 0.2790738344192505, "eval_runtime": 57.3594, "eval_samples_per_second": 17.016, "eval_steps_per_second": 2.127, "eval_wer": 0.4278406617326948, "step": 1200 }, { "epoch": 0.6704486848891181, "grad_norm": 2.047067165374756, "learning_rate": 0.0002938212502152574, "loss": 0.3918, "step": 1300 }, { "epoch": 0.6704486848891181, "eval_loss": 0.27573078870773315, "eval_runtime": 57.7956, "eval_samples_per_second": 16.887, "eval_steps_per_second": 2.111, "eval_wer": 0.42511972137570747, "step": 1300 }, { "epoch": 0.7220216606498195, "grad_norm": 1.4619171619415283, "learning_rate": 0.00029330979851902874, "loss": 0.3669, "step": 1400 }, { "epoch": 0.7220216606498195, "eval_loss": 0.2721499502658844, "eval_runtime": 57.7153, "eval_samples_per_second": 16.911, "eval_steps_per_second": 2.114, "eval_wer": 0.41815411406181974, "step": 1400 }, { "epoch": 0.7735946364105208, "grad_norm": 1.6276806592941284, "learning_rate": 0.0002927931806440503, "loss": 0.377, "step": 1500 }, { "epoch": 0.7735946364105208, "eval_loss": 0.27284711599349976, "eval_runtime": 57.3961, "eval_samples_per_second": 17.005, "eval_steps_per_second": 2.126, "eval_wer": 0.47572921201567264, "step": 1500 }, { "epoch": 0.8251676121712223, "grad_norm": 3.681490659713745, "learning_rate": 0.0002922765627690718, "loss": 0.4174, "step": 1600 }, { "epoch": 0.8251676121712223, "eval_loss": 0.2684435546398163, "eval_runtime": 57.3852, "eval_samples_per_second": 17.008, "eval_steps_per_second": 2.126, "eval_wer": 0.4242490204614715, "step": 1600 }, { "epoch": 0.8767405879319237, "grad_norm": 1.2958589792251587, "learning_rate": 0.0002917599448940933, "loss": 0.3641, "step": 1700 }, { "epoch": 0.8767405879319237, "eval_loss": 0.2649287283420563, "eval_runtime": 57.8551, "eval_samples_per_second": 16.87, "eval_steps_per_second": 2.109, "eval_wer": 0.4194601654331737, "step": 1700 }, { "epoch": 0.9283135636926251, "grad_norm": 1.8431603908538818, "learning_rate": 0.00029124332701911486, "loss": 0.3882, "step": 1800 }, { "epoch": 0.9283135636926251, "eval_loss": 0.2646693289279938, "eval_runtime": 57.7373, "eval_samples_per_second": 16.904, "eval_steps_per_second": 2.113, "eval_wer": 0.41249455811928604, "step": 1800 }, { "epoch": 0.9798865394533265, "grad_norm": 2.4119603633880615, "learning_rate": 0.00029072670914413636, "loss": 0.3861, "step": 1900 }, { "epoch": 0.9798865394533265, "eval_loss": 0.2668125033378601, "eval_runtime": 57.5111, "eval_samples_per_second": 16.971, "eval_steps_per_second": 2.121, "eval_wer": 0.44253373966042664, "step": 1900 }, { "epoch": 1.0314595152140278, "grad_norm": 1.174663782119751, "learning_rate": 0.00029021009126915787, "loss": 0.3647, "step": 2000 }, { "epoch": 1.0314595152140278, "eval_loss": 0.26750749349594116, "eval_runtime": 57.5411, "eval_samples_per_second": 16.962, "eval_steps_per_second": 2.12, "eval_wer": 0.42457553330430997, "step": 2000 }, { "epoch": 1.0830324909747293, "grad_norm": 0.9301519393920898, "learning_rate": 0.0002896934733941794, "loss": 0.3467, "step": 2100 }, { "epoch": 1.0830324909747293, "eval_loss": 0.2629115581512451, "eval_runtime": 57.7541, "eval_samples_per_second": 16.899, "eval_steps_per_second": 2.112, "eval_wer": 0.40977361776229865, "step": 2100 }, { "epoch": 1.1346054667354306, "grad_norm": 0.7093687057495117, "learning_rate": 0.00028917685551920093, "loss": 0.3579, "step": 2200 }, { "epoch": 1.1346054667354306, "eval_loss": 0.258684903383255, "eval_runtime": 57.7529, "eval_samples_per_second": 16.9, "eval_steps_per_second": 2.112, "eval_wer": 0.41858946451893775, "step": 2200 }, { "epoch": 1.1861784424961321, "grad_norm": 1.027114748954773, "learning_rate": 0.00028866023764422243, "loss": 0.3544, "step": 2300 }, { "epoch": 1.1861784424961321, "eval_loss": 0.2609032392501831, "eval_runtime": 57.5179, "eval_samples_per_second": 16.969, "eval_steps_per_second": 2.121, "eval_wer": 0.412712233347845, "step": 2300 }, { "epoch": 1.2377514182568334, "grad_norm": 0.9472237825393677, "learning_rate": 0.000288143619769244, "loss": 0.35, "step": 2400 }, { "epoch": 1.2377514182568334, "eval_loss": 0.2592073678970337, "eval_runtime": 57.1938, "eval_samples_per_second": 17.065, "eval_steps_per_second": 2.133, "eval_wer": 0.4061819764910753, "step": 2400 }, { "epoch": 1.2893243940175347, "grad_norm": 1.6566214561462402, "learning_rate": 0.00028762700189426555, "loss": 0.3519, "step": 2500 }, { "epoch": 1.2893243940175347, "eval_loss": 0.2590978443622589, "eval_runtime": 57.823, "eval_samples_per_second": 16.879, "eval_steps_per_second": 2.11, "eval_wer": 0.4134740966478015, "step": 2500 }, { "epoch": 1.2893243940175347, "step": 2500, "total_flos": 5.958212987780215e+18, "train_loss": 0.6475233551025391, "train_runtime": 3633.5111, "train_samples_per_second": 64.037, "train_steps_per_second": 16.009 } ], "logging_steps": 100, "max_steps": 58170, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.958212987780215e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }