|
{ |
|
"best_metric": 0.04373383894562721, |
|
"best_model_checkpoint": "w2v-bert-final-v2/checkpoint-10000", |
|
"epoch": 7.716049382716049, |
|
"eval_steps": 1000, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23148148148148148, |
|
"grad_norm": 7.229776859283447, |
|
"learning_rate": 7.425e-06, |
|
"loss": 3.4047, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.46296296296296297, |
|
"grad_norm": 11.000567436218262, |
|
"learning_rate": 1.4925e-05, |
|
"loss": 0.8648, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 11.643492698669434, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 0.6263, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7716049382716049, |
|
"eval_loss": 0.5938597321510315, |
|
"eval_runtime": 49.2872, |
|
"eval_samples_per_second": 38.408, |
|
"eval_steps_per_second": 4.809, |
|
"eval_wer": 0.509299543180335, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 16.209674835205078, |
|
"learning_rate": 2.9900000000000002e-05, |
|
"loss": 0.5512, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.1574074074074074, |
|
"grad_norm": 2.7776882648468018, |
|
"learning_rate": 3.74e-05, |
|
"loss": 0.4844, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.3888888888888888, |
|
"grad_norm": 5.179060459136963, |
|
"learning_rate": 4.4875e-05, |
|
"loss": 0.4641, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.5432098765432098, |
|
"eval_loss": 0.5058629512786865, |
|
"eval_runtime": 49.9367, |
|
"eval_samples_per_second": 37.908, |
|
"eval_steps_per_second": 4.746, |
|
"eval_wer": 0.45061996954535566, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.6203703703703702, |
|
"grad_norm": 2.7534990310668945, |
|
"learning_rate": 4.943236137667304e-05, |
|
"loss": 0.4629, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.8518518518518519, |
|
"grad_norm": 3.1153225898742676, |
|
"learning_rate": 4.763981835564054e-05, |
|
"loss": 0.4393, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.0833333333333335, |
|
"grad_norm": 2.560495376586914, |
|
"learning_rate": 4.584727533460803e-05, |
|
"loss": 0.3782, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"grad_norm": 2.7176785469055176, |
|
"learning_rate": 4.4054732313575525e-05, |
|
"loss": 0.3054, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.314814814814815, |
|
"eval_loss": 0.34695935249328613, |
|
"eval_runtime": 49.4937, |
|
"eval_samples_per_second": 38.247, |
|
"eval_steps_per_second": 4.788, |
|
"eval_wer": 0.3308135740700457, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.5462962962962963, |
|
"grad_norm": 2.319594383239746, |
|
"learning_rate": 4.226816443594647e-05, |
|
"loss": 0.3079, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.7777777777777777, |
|
"grad_norm": 2.165224552154541, |
|
"learning_rate": 4.047562141491396e-05, |
|
"loss": 0.301, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.009259259259259, |
|
"grad_norm": 3.609168529510498, |
|
"learning_rate": 3.8683078393881456e-05, |
|
"loss": 0.2837, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.0864197530864197, |
|
"eval_loss": 0.2659013867378235, |
|
"eval_runtime": 50.1986, |
|
"eval_samples_per_second": 37.71, |
|
"eval_steps_per_second": 4.721, |
|
"eval_wer": 0.2583206438981945, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.240740740740741, |
|
"grad_norm": 1.6436126232147217, |
|
"learning_rate": 3.689053537284895e-05, |
|
"loss": 0.2191, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.4722222222222223, |
|
"grad_norm": 1.704137921333313, |
|
"learning_rate": 3.5103967495219884e-05, |
|
"loss": 0.2103, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.7037037037037037, |
|
"grad_norm": 6.373330593109131, |
|
"learning_rate": 3.331142447418738e-05, |
|
"loss": 0.2174, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.8580246913580245, |
|
"eval_loss": 0.19009661674499512, |
|
"eval_runtime": 49.6613, |
|
"eval_samples_per_second": 38.118, |
|
"eval_steps_per_second": 4.772, |
|
"eval_wer": 0.195072873613226, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.935185185185185, |
|
"grad_norm": 2.4362969398498535, |
|
"learning_rate": 3.151888145315488e-05, |
|
"loss": 0.1939, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.166666666666667, |
|
"grad_norm": 1.4299020767211914, |
|
"learning_rate": 2.9726338432122373e-05, |
|
"loss": 0.1666, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.398148148148148, |
|
"grad_norm": 1.3548126220703125, |
|
"learning_rate": 2.793379541108987e-05, |
|
"loss": 0.1569, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"grad_norm": 1.5247106552124023, |
|
"learning_rate": 2.6141252390057363e-05, |
|
"loss": 0.152, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.62962962962963, |
|
"eval_loss": 0.15165844559669495, |
|
"eval_runtime": 49.8658, |
|
"eval_samples_per_second": 37.962, |
|
"eval_steps_per_second": 4.753, |
|
"eval_wer": 0.1540678703502284, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.861111111111111, |
|
"grad_norm": 3.228041410446167, |
|
"learning_rate": 2.434870936902486e-05, |
|
"loss": 0.1481, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.092592592592593, |
|
"grad_norm": 1.5059906244277954, |
|
"learning_rate": 2.2562141491395794e-05, |
|
"loss": 0.1256, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 5.324074074074074, |
|
"grad_norm": 3.538102626800537, |
|
"learning_rate": 2.0769598470363287e-05, |
|
"loss": 0.1089, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 5.401234567901234, |
|
"eval_loss": 0.11081259697675705, |
|
"eval_runtime": 49.8031, |
|
"eval_samples_per_second": 38.01, |
|
"eval_steps_per_second": 4.759, |
|
"eval_wer": 0.11404176636937133, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 0.6531468033790588, |
|
"learning_rate": 1.8977055449330787e-05, |
|
"loss": 0.1022, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.787037037037037, |
|
"grad_norm": 1.7070139646530151, |
|
"learning_rate": 1.718451242829828e-05, |
|
"loss": 0.1022, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.018518518518518, |
|
"grad_norm": 1.612425446510315, |
|
"learning_rate": 1.5397944550669215e-05, |
|
"loss": 0.0986, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 6.172839506172839, |
|
"eval_loss": 0.08366883546113968, |
|
"eval_runtime": 51.7773, |
|
"eval_samples_per_second": 36.56, |
|
"eval_steps_per_second": 4.577, |
|
"eval_wer": 0.08902545138133565, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 0.7614215612411499, |
|
"learning_rate": 1.3605401529636713e-05, |
|
"loss": 0.0766, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 6.481481481481482, |
|
"grad_norm": 2.542365550994873, |
|
"learning_rate": 1.181883365200765e-05, |
|
"loss": 0.0758, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 6.712962962962963, |
|
"grad_norm": 1.319675326347351, |
|
"learning_rate": 1.0026290630975144e-05, |
|
"loss": 0.0726, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"grad_norm": 0.8109455704689026, |
|
"learning_rate": 8.233747609942639e-06, |
|
"loss": 0.0648, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.944444444444445, |
|
"eval_loss": 0.05808680132031441, |
|
"eval_runtime": 51.2984, |
|
"eval_samples_per_second": 36.902, |
|
"eval_steps_per_second": 4.62, |
|
"eval_wer": 0.06031107243854688, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.175925925925926, |
|
"grad_norm": 1.2560392618179321, |
|
"learning_rate": 6.441204588910134e-06, |
|
"loss": 0.051, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 7.407407407407407, |
|
"grad_norm": 3.7403156757354736, |
|
"learning_rate": 4.654636711281071e-06, |
|
"loss": 0.0473, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 7.638888888888889, |
|
"grad_norm": 0.7289965748786926, |
|
"learning_rate": 2.862093690248566e-06, |
|
"loss": 0.0499, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 7.716049382716049, |
|
"eval_loss": 0.04373383894562721, |
|
"eval_runtime": 50.2864, |
|
"eval_samples_per_second": 37.644, |
|
"eval_steps_per_second": 4.713, |
|
"eval_wer": 0.04606264955405699, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 300, |
|
"max_steps": 10368, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 1000, |
|
"total_flos": 2.075992650724199e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|