{ "best_metric": 0.31921109557151794, "best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bemgen-male-model/checkpoint-1100", "epoch": 5.396396396396397, "eval_steps": 100, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.36036036036036034, "eval_loss": 3.5018131732940674, "eval_runtime": 20.1337, "eval_samples_per_second": 13.212, "eval_steps_per_second": 3.328, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.7207207207207207, "eval_loss": 2.861574172973633, "eval_runtime": 19.7549, "eval_samples_per_second": 13.465, "eval_steps_per_second": 3.392, "eval_wer": 1.0, "step": 200 }, { "epoch": 1.0792792792792794, "eval_loss": 1.1005113124847412, "eval_runtime": 19.8914, "eval_samples_per_second": 13.373, "eval_steps_per_second": 3.368, "eval_wer": 0.9874489200233508, "step": 300 }, { "epoch": 1.4396396396396396, "eval_loss": 0.6284775137901306, "eval_runtime": 19.8994, "eval_samples_per_second": 13.367, "eval_steps_per_second": 3.367, "eval_wer": 0.835960303561004, "step": 400 }, { "epoch": 1.8, "grad_norm": 4.3291192054748535, "learning_rate": 2.976e-05, "loss": 5.4329, "step": 500 }, { "epoch": 1.8, "eval_loss": 0.5177034139633179, "eval_runtime": 20.1072, "eval_samples_per_second": 13.229, "eval_steps_per_second": 3.332, "eval_wer": 0.7589025102159953, "step": 500 }, { "epoch": 2.1585585585585587, "eval_loss": 0.4120151698589325, "eval_runtime": 19.9558, "eval_samples_per_second": 13.329, "eval_steps_per_second": 3.357, "eval_wer": 0.5989492119089317, "step": 600 }, { "epoch": 2.518918918918919, "eval_loss": 0.3997887372970581, "eval_runtime": 19.8674, "eval_samples_per_second": 13.389, "eval_steps_per_second": 3.372, "eval_wer": 0.549620548744892, "step": 700 }, { "epoch": 2.879279279279279, "eval_loss": 0.37145501375198364, "eval_runtime": 19.9372, "eval_samples_per_second": 13.342, "eval_steps_per_second": 3.361, "eval_wer": 0.5653823701109165, "step": 800 }, { "epoch": 3.237837837837838, "eval_loss": 0.335114449262619, "eval_runtime": 20.0684, "eval_samples_per_second": 13.255, "eval_steps_per_second": 3.339, "eval_wer": 0.4871570344424985, "step": 900 }, { "epoch": 3.5981981981981983, "grad_norm": 2.873544692993164, "learning_rate": 2.8094750320102435e-05, "loss": 0.644, "step": 1000 }, { "epoch": 3.5981981981981983, "eval_loss": 0.33343327045440674, "eval_runtime": 19.9311, "eval_samples_per_second": 13.346, "eval_steps_per_second": 3.362, "eval_wer": 0.5014594279042616, "step": 1000 }, { "epoch": 3.9585585585585585, "eval_loss": 0.31921109557151794, "eval_runtime": 19.8773, "eval_samples_per_second": 13.382, "eval_steps_per_second": 3.371, "eval_wer": 0.4795680093403386, "step": 1100 }, { "epoch": 4.317117117117117, "eval_loss": 0.3245755434036255, "eval_runtime": 19.8844, "eval_samples_per_second": 13.377, "eval_steps_per_second": 3.369, "eval_wer": 0.45942790426152946, "step": 1200 }, { "epoch": 4.677477477477478, "eval_loss": 0.3242240250110626, "eval_runtime": 19.9276, "eval_samples_per_second": 13.348, "eval_steps_per_second": 3.362, "eval_wer": 0.46117921774664333, "step": 1300 }, { "epoch": 5.036036036036036, "eval_loss": 0.3202609419822693, "eval_runtime": 20.0808, "eval_samples_per_second": 13.246, "eval_steps_per_second": 3.337, "eval_wer": 0.44424985405720957, "step": 1400 }, { "epoch": 5.396396396396397, "grad_norm": 1.6697975397109985, "learning_rate": 2.61741357234315e-05, "loss": 0.3852, "step": 1500 }, { "epoch": 5.396396396396397, "eval_loss": 0.3452622890472412, "eval_runtime": 19.9999, "eval_samples_per_second": 13.3, "eval_steps_per_second": 3.35, "eval_wer": 0.44162288382953885, "step": 1500 }, { "epoch": 5.396396396396397, "step": 1500, "total_flos": 9.503204377194754e+18, "train_loss": 2.1540360310872395, "train_runtime": 2027.2043, "train_samples_per_second": 32.809, "train_steps_per_second": 4.099 } ], "logging_steps": 500, "max_steps": 8310, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 400, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 4, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.503204377194754e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }