{ "best_metric": 0.6599727869033813, "best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-combined-model/checkpoint-800", "epoch": 2.7534418022528158, "eval_steps": 100, "global_step": 1100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2503128911138924, "grad_norm": 2.3866429328918457, "learning_rate": 4.9e-05, "loss": 6.1525, "step": 100 }, { "epoch": 0.2503128911138924, "eval_loss": 2.9905781745910645, "eval_runtime": 96.751, "eval_samples_per_second": 14.036, "eval_steps_per_second": 1.757, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.5006257822277848, "grad_norm": 12.731361389160156, "learning_rate": 4.958719460825611e-05, "loss": 2.8477, "step": 200 }, { "epoch": 0.5006257822277848, "eval_loss": 2.3722355365753174, "eval_runtime": 95.7895, "eval_samples_per_second": 14.177, "eval_steps_per_second": 1.775, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.7509386733416771, "grad_norm": 3.254181385040283, "learning_rate": 4.916596461668071e-05, "loss": 1.3216, "step": 300 }, { "epoch": 0.7509386733416771, "eval_loss": 0.8614773154258728, "eval_runtime": 95.7487, "eval_samples_per_second": 14.183, "eval_steps_per_second": 1.775, "eval_wer": 0.8819921491658489, "step": 300 }, { "epoch": 1.0012515644555695, "grad_norm": 4.778253555297852, "learning_rate": 4.874473462510531e-05, "loss": 0.8219, "step": 400 }, { "epoch": 1.0012515644555695, "eval_loss": 0.8088704347610474, "eval_runtime": 95.7716, "eval_samples_per_second": 14.18, "eval_steps_per_second": 1.775, "eval_wer": 0.8601570166830226, "step": 400 }, { "epoch": 1.2515644555694618, "grad_norm": 1.714836597442627, "learning_rate": 4.832350463352991e-05, "loss": 0.718, "step": 500 }, { "epoch": 1.2515644555694618, "eval_loss": 0.7154927253723145, "eval_runtime": 96.02, "eval_samples_per_second": 14.143, "eval_steps_per_second": 1.77, "eval_wer": 0.7755152109911678, "step": 500 }, { "epoch": 1.5018773466833542, "grad_norm": 3.5530471801757812, "learning_rate": 4.7902274641954506e-05, "loss": 0.6985, "step": 600 }, { "epoch": 1.5018773466833542, "eval_loss": 0.7211880087852478, "eval_runtime": 95.9087, "eval_samples_per_second": 14.159, "eval_steps_per_second": 1.773, "eval_wer": 0.6726365718024206, "step": 600 }, { "epoch": 1.7521902377972465, "grad_norm": 3.236358642578125, "learning_rate": 4.7481044650379106e-05, "loss": 0.669, "step": 700 }, { "epoch": 1.7521902377972465, "eval_loss": 0.6837976574897766, "eval_runtime": 96.2232, "eval_samples_per_second": 14.113, "eval_steps_per_second": 1.767, "eval_wer": 0.7244030094864246, "step": 700 }, { "epoch": 2.002503128911139, "grad_norm": 1.7046451568603516, "learning_rate": 4.7059814658803705e-05, "loss": 0.6842, "step": 800 }, { "epoch": 2.002503128911139, "eval_loss": 0.6599727869033813, "eval_runtime": 96.1095, "eval_samples_per_second": 14.13, "eval_steps_per_second": 1.769, "eval_wer": 0.7042852469741576, "step": 800 }, { "epoch": 2.252816020025031, "grad_norm": 7.314039707183838, "learning_rate": 4.663858466722831e-05, "loss": 0.5908, "step": 900 }, { "epoch": 2.252816020025031, "eval_loss": 0.821216881275177, "eval_runtime": 96.0491, "eval_samples_per_second": 14.139, "eval_steps_per_second": 1.77, "eval_wer": 0.862855740922473, "step": 900 }, { "epoch": 2.5031289111389237, "grad_norm": 0.7544125318527222, "learning_rate": 4.621735467565291e-05, "loss": 0.5575, "step": 1000 }, { "epoch": 2.5031289111389237, "eval_loss": 0.6882767081260681, "eval_runtime": 95.8261, "eval_samples_per_second": 14.172, "eval_steps_per_second": 1.774, "eval_wer": 0.7400228982662741, "step": 1000 }, { "epoch": 2.7534418022528158, "grad_norm": 5.456251621246338, "learning_rate": 4.579612468407751e-05, "loss": 0.583, "step": 1100 }, { "epoch": 2.7534418022528158, "eval_loss": 0.7891210913658142, "eval_runtime": 96.0541, "eval_samples_per_second": 14.138, "eval_steps_per_second": 1.77, "eval_wer": 0.8344782466470396, "step": 1100 }, { "epoch": 2.7534418022528158, "step": 1100, "total_flos": 1.1254487729160804e+19, "train_loss": 1.4222582556984642, "train_runtime": 3256.3096, "train_samples_per_second": 58.843, "train_steps_per_second": 3.676 } ], "logging_steps": 100, "max_steps": 11970, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 2 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1254487729160804e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }