|
{ |
|
"best_metric": 0.24462078511714935, |
|
"best_model_checkpoint": "/scratch/skscla001/speech/results/xls-r-1b-bemgen-combined-model/checkpoint-1500", |
|
"epoch": 3.3871543264942017, |
|
"eval_steps": 100, |
|
"global_step": 1900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1784121320249777, |
|
"eval_loss": 3.441302537918091, |
|
"eval_runtime": 39.9204, |
|
"eval_samples_per_second": 13.552, |
|
"eval_steps_per_second": 3.407, |
|
"eval_wer": 1.0002904865649964, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3568242640499554, |
|
"eval_loss": 2.914942979812622, |
|
"eval_runtime": 39.4152, |
|
"eval_samples_per_second": 13.726, |
|
"eval_steps_per_second": 3.45, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5352363960749331, |
|
"eval_loss": 0.7767665386199951, |
|
"eval_runtime": 39.9772, |
|
"eval_samples_per_second": 13.533, |
|
"eval_steps_per_second": 3.402, |
|
"eval_wer": 0.9234567901234568, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7136485280999108, |
|
"eval_loss": 0.605692446231842, |
|
"eval_runtime": 39.422, |
|
"eval_samples_per_second": 13.723, |
|
"eval_steps_per_second": 3.45, |
|
"eval_wer": 0.904720406681191, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8920606601248885, |
|
"grad_norm": 3.2083628177642822, |
|
"learning_rate": 2.976e-05, |
|
"loss": 5.3372, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8920606601248885, |
|
"eval_loss": 0.43167629837989807, |
|
"eval_runtime": 39.6193, |
|
"eval_samples_per_second": 13.655, |
|
"eval_steps_per_second": 3.433, |
|
"eval_wer": 0.6720406681190995, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0695807314897412, |
|
"eval_loss": 0.3997327983379364, |
|
"eval_runtime": 39.7537, |
|
"eval_samples_per_second": 13.609, |
|
"eval_steps_per_second": 3.421, |
|
"eval_wer": 0.6704429920116195, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.247992863514719, |
|
"eval_loss": 0.3610936999320984, |
|
"eval_runtime": 39.4379, |
|
"eval_samples_per_second": 13.718, |
|
"eval_steps_per_second": 3.448, |
|
"eval_wer": 0.6405228758169934, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4264049955396967, |
|
"eval_loss": 0.34406763315200806, |
|
"eval_runtime": 39.4181, |
|
"eval_samples_per_second": 13.725, |
|
"eval_steps_per_second": 3.45, |
|
"eval_wer": 0.5603485838779957, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6048171275646745, |
|
"eval_loss": 0.2945486903190613, |
|
"eval_runtime": 39.8239, |
|
"eval_samples_per_second": 13.585, |
|
"eval_steps_per_second": 3.415, |
|
"eval_wer": 0.49135802469135803, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.783229259589652, |
|
"grad_norm": 3.3745229244232178, |
|
"learning_rate": 2.908711656441718e-05, |
|
"loss": 0.6459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.783229259589652, |
|
"eval_loss": 0.3041314482688904, |
|
"eval_runtime": 39.339, |
|
"eval_samples_per_second": 13.752, |
|
"eval_steps_per_second": 3.457, |
|
"eval_wer": 0.4923747276688453, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.9616413916146298, |
|
"eval_loss": 0.2805267870426178, |
|
"eval_runtime": 39.2725, |
|
"eval_samples_per_second": 13.776, |
|
"eval_steps_per_second": 3.463, |
|
"eval_wer": 0.46811909949164854, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.1391614629794824, |
|
"eval_loss": 0.2774401605129242, |
|
"eval_runtime": 39.4398, |
|
"eval_samples_per_second": 13.717, |
|
"eval_steps_per_second": 3.448, |
|
"eval_wer": 0.5108206245461148, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.3175735950044603, |
|
"eval_loss": 0.26826903223991394, |
|
"eval_runtime": 39.6616, |
|
"eval_samples_per_second": 13.64, |
|
"eval_steps_per_second": 3.429, |
|
"eval_wer": 0.42541757443718226, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.495985727029438, |
|
"eval_loss": 0.2643994987010956, |
|
"eval_runtime": 39.3737, |
|
"eval_samples_per_second": 13.74, |
|
"eval_steps_per_second": 3.454, |
|
"eval_wer": 0.4381989832970225, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.674397859054416, |
|
"grad_norm": 2.350830554962158, |
|
"learning_rate": 2.816687116564417e-05, |
|
"loss": 0.4599, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.674397859054416, |
|
"eval_loss": 0.24462078511714935, |
|
"eval_runtime": 39.5477, |
|
"eval_samples_per_second": 13.68, |
|
"eval_steps_per_second": 3.439, |
|
"eval_wer": 0.4142338416848221, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.8528099910793934, |
|
"eval_loss": 0.24734348058700562, |
|
"eval_runtime": 39.4462, |
|
"eval_samples_per_second": 13.715, |
|
"eval_steps_per_second": 3.448, |
|
"eval_wer": 0.4117647058823529, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.0303300624442464, |
|
"eval_loss": 0.24915921688079834, |
|
"eval_runtime": 39.9106, |
|
"eval_samples_per_second": 13.555, |
|
"eval_steps_per_second": 3.408, |
|
"eval_wer": 0.396078431372549, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.208742194469224, |
|
"eval_loss": 0.24668627977371216, |
|
"eval_runtime": 39.4647, |
|
"eval_samples_per_second": 13.708, |
|
"eval_steps_per_second": 3.446, |
|
"eval_wer": 0.40697167755991287, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.3871543264942017, |
|
"eval_loss": 0.25092557072639465, |
|
"eval_runtime": 39.3684, |
|
"eval_samples_per_second": 13.742, |
|
"eval_steps_per_second": 3.455, |
|
"eval_wer": 0.39230210602759624, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.3871543264942017, |
|
"step": 1900, |
|
"total_flos": 1.1803978917941375e+19, |
|
"train_loss": 1.775066086618524, |
|
"train_runtime": 3182.4103, |
|
"train_samples_per_second": 42.251, |
|
"train_steps_per_second": 5.279 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 4, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 1 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1803978917941375e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|