|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 100, |
|
"global_step": 5640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26595744680851063, |
|
"eval_loss": 17.797901153564453, |
|
"eval_runtime": 208.2309, |
|
"eval_samples_per_second": 23.825, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"eval_loss": 5.018890380859375, |
|
"eval_runtime": 206.5228, |
|
"eval_samples_per_second": 24.022, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.7978723404255319, |
|
"eval_loss": 4.124919891357422, |
|
"eval_runtime": 206.6954, |
|
"eval_samples_per_second": 24.001, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"eval_loss": 4.171339511871338, |
|
"eval_runtime": 206.9498, |
|
"eval_samples_per_second": 23.972, |
|
"eval_steps_per_second": 3.001, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"grad_norm": 2.336822509765625, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 9.6222, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3297872340425532, |
|
"eval_loss": 4.171350479125977, |
|
"eval_runtime": 206.1998, |
|
"eval_samples_per_second": 24.059, |
|
"eval_steps_per_second": 3.012, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"eval_loss": 4.168086051940918, |
|
"eval_runtime": 206.1519, |
|
"eval_samples_per_second": 24.065, |
|
"eval_steps_per_second": 3.012, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.8617021276595744, |
|
"eval_loss": 4.133743762969971, |
|
"eval_runtime": 206.5072, |
|
"eval_samples_per_second": 24.023, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"eval_loss": 4.110419273376465, |
|
"eval_runtime": 206.2896, |
|
"eval_samples_per_second": 24.049, |
|
"eval_steps_per_second": 3.01, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.393617021276596, |
|
"eval_loss": 4.1108503341674805, |
|
"eval_runtime": 206.9777, |
|
"eval_samples_per_second": 23.969, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 1.0139914751052856, |
|
"learning_rate": 0.0002711089494163424, |
|
"loss": 4.1243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"eval_loss": 4.111221790313721, |
|
"eval_runtime": 207.2581, |
|
"eval_samples_per_second": 23.936, |
|
"eval_steps_per_second": 2.996, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.925531914893617, |
|
"eval_loss": 4.116605758666992, |
|
"eval_runtime": 206.7383, |
|
"eval_samples_per_second": 23.997, |
|
"eval_steps_per_second": 3.004, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"eval_loss": 4.143283367156982, |
|
"eval_runtime": 207.4325, |
|
"eval_samples_per_second": 23.916, |
|
"eval_steps_per_second": 2.994, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.4574468085106385, |
|
"eval_loss": 4.188790321350098, |
|
"eval_runtime": 206.5637, |
|
"eval_samples_per_second": 24.017, |
|
"eval_steps_per_second": 3.006, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"eval_loss": 4.2255859375, |
|
"eval_runtime": 206.5284, |
|
"eval_samples_per_second": 24.021, |
|
"eval_steps_per_second": 3.007, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"grad_norm": 2.125878095626831, |
|
"learning_rate": 0.0002419260700389105, |
|
"loss": 4.1253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9893617021276597, |
|
"eval_loss": 4.14175271987915, |
|
"eval_runtime": 206.1478, |
|
"eval_samples_per_second": 24.065, |
|
"eval_steps_per_second": 3.012, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"eval_loss": 4.113326549530029, |
|
"eval_runtime": 206.8791, |
|
"eval_samples_per_second": 23.98, |
|
"eval_steps_per_second": 3.002, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.5212765957446805, |
|
"eval_loss": 4.112410545349121, |
|
"eval_runtime": 207.4656, |
|
"eval_samples_per_second": 23.912, |
|
"eval_steps_per_second": 2.993, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"eval_loss": 4.113221645355225, |
|
"eval_runtime": 206.6651, |
|
"eval_samples_per_second": 24.005, |
|
"eval_steps_per_second": 3.005, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.053191489361702, |
|
"eval_loss": 4.19992208480835, |
|
"eval_runtime": 206.9983, |
|
"eval_samples_per_second": 23.966, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 1.172359585762024, |
|
"learning_rate": 0.0002127431906614786, |
|
"loss": 4.1182, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"eval_loss": 4.179160118103027, |
|
"eval_runtime": 207.0024, |
|
"eval_samples_per_second": 23.966, |
|
"eval_steps_per_second": 3.0, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.585106382978723, |
|
"eval_loss": 4.211320400238037, |
|
"eval_runtime": 208.18, |
|
"eval_samples_per_second": 23.83, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"eval_loss": 4.20604133605957, |
|
"eval_runtime": 207.2193, |
|
"eval_samples_per_second": 23.941, |
|
"eval_steps_per_second": 2.997, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.117021276595745, |
|
"eval_loss": 4.110590934753418, |
|
"eval_runtime": 208.0433, |
|
"eval_samples_per_second": 23.846, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"eval_loss": 4.112478256225586, |
|
"eval_runtime": 207.5552, |
|
"eval_samples_per_second": 23.902, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"grad_norm": 1.6996749639511108, |
|
"learning_rate": 0.0001835603112840467, |
|
"loss": 4.1207, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.648936170212766, |
|
"eval_loss": 4.113041877746582, |
|
"eval_runtime": 207.168, |
|
"eval_samples_per_second": 23.947, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"eval_loss": 4.108416557312012, |
|
"eval_runtime": 207.1242, |
|
"eval_samples_per_second": 23.952, |
|
"eval_steps_per_second": 2.998, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.180851063829787, |
|
"eval_loss": 4.170561790466309, |
|
"eval_runtime": 210.6386, |
|
"eval_samples_per_second": 23.552, |
|
"eval_steps_per_second": 2.948, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"eval_loss": 4.179252624511719, |
|
"eval_runtime": 207.7058, |
|
"eval_samples_per_second": 23.885, |
|
"eval_steps_per_second": 2.99, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.712765957446808, |
|
"eval_loss": 4.18436336517334, |
|
"eval_runtime": 207.7366, |
|
"eval_samples_per_second": 23.881, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 3.874195098876953, |
|
"learning_rate": 0.0001544941634241245, |
|
"loss": 4.3809, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"eval_loss": 4.143243789672852, |
|
"eval_runtime": 208.1415, |
|
"eval_samples_per_second": 23.835, |
|
"eval_steps_per_second": 2.984, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.24468085106383, |
|
"eval_loss": 4.108314514160156, |
|
"eval_runtime": 208.2271, |
|
"eval_samples_per_second": 23.825, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"eval_loss": 4.117116928100586, |
|
"eval_runtime": 208.0587, |
|
"eval_samples_per_second": 23.844, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 8.77659574468085, |
|
"eval_loss": 4.11330509185791, |
|
"eval_runtime": 208.1596, |
|
"eval_samples_per_second": 23.833, |
|
"eval_steps_per_second": 2.983, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"eval_loss": 4.163766384124756, |
|
"eval_runtime": 208.0242, |
|
"eval_samples_per_second": 23.848, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"grad_norm": 2.0024287700653076, |
|
"learning_rate": 0.0001253112840466926, |
|
"loss": 4.1164, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.308510638297872, |
|
"eval_loss": 4.130943298339844, |
|
"eval_runtime": 208.5062, |
|
"eval_samples_per_second": 23.793, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"eval_loss": 4.1129841804504395, |
|
"eval_runtime": 207.7504, |
|
"eval_samples_per_second": 23.88, |
|
"eval_steps_per_second": 2.989, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 9.840425531914894, |
|
"eval_loss": 4.127650737762451, |
|
"eval_runtime": 208.5195, |
|
"eval_samples_per_second": 23.792, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 10.106382978723405, |
|
"eval_loss": 4.1156158447265625, |
|
"eval_runtime": 208.3668, |
|
"eval_samples_per_second": 23.809, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 10.372340425531915, |
|
"eval_loss": 4.1260600090026855, |
|
"eval_runtime": 208.8761, |
|
"eval_samples_per_second": 23.751, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"grad_norm": 1.807287573814392, |
|
"learning_rate": 9.61284046692607e-05, |
|
"loss": 4.1195, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.638297872340425, |
|
"eval_loss": 4.130354404449463, |
|
"eval_runtime": 208.2427, |
|
"eval_samples_per_second": 23.823, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 10.904255319148936, |
|
"eval_loss": 4.128208637237549, |
|
"eval_runtime": 208.9946, |
|
"eval_samples_per_second": 23.737, |
|
"eval_steps_per_second": 2.971, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 11.170212765957446, |
|
"eval_loss": 4.11016845703125, |
|
"eval_runtime": 214.217, |
|
"eval_samples_per_second": 23.159, |
|
"eval_steps_per_second": 2.899, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 11.436170212765958, |
|
"eval_loss": 4.111141681671143, |
|
"eval_runtime": 208.356, |
|
"eval_samples_per_second": 23.81, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 11.702127659574469, |
|
"eval_loss": 4.111063003540039, |
|
"eval_runtime": 209.2862, |
|
"eval_samples_per_second": 23.704, |
|
"eval_steps_per_second": 2.967, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"grad_norm": 2.2066569328308105, |
|
"learning_rate": 6.69455252918288e-05, |
|
"loss": 4.1164, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 11.96808510638298, |
|
"eval_loss": 4.112732410430908, |
|
"eval_runtime": 213.9347, |
|
"eval_samples_per_second": 23.189, |
|
"eval_steps_per_second": 2.903, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 12.23404255319149, |
|
"eval_loss": 4.141692638397217, |
|
"eval_runtime": 214.7394, |
|
"eval_samples_per_second": 23.102, |
|
"eval_steps_per_second": 2.892, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_loss": 4.139012336730957, |
|
"eval_runtime": 223.6932, |
|
"eval_samples_per_second": 22.178, |
|
"eval_steps_per_second": 2.776, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 12.76595744680851, |
|
"eval_loss": 4.143847942352295, |
|
"eval_runtime": 207.9672, |
|
"eval_samples_per_second": 23.855, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 13.03191489361702, |
|
"eval_loss": 4.122192859649658, |
|
"eval_runtime": 208.5333, |
|
"eval_samples_per_second": 23.79, |
|
"eval_steps_per_second": 2.978, |
|
"eval_wer": 1.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"grad_norm": 0.3912961781024933, |
|
"learning_rate": 3.776264591439688e-05, |
|
"loss": 4.1117, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.297872340425531, |
|
"eval_loss": 4.115208625793457, |
|
"eval_runtime": 208.8419, |
|
"eval_samples_per_second": 23.755, |
|
"eval_steps_per_second": 2.974, |
|
"eval_wer": 1.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 13.563829787234042, |
|
"eval_loss": 4.124714374542236, |
|
"eval_runtime": 208.9077, |
|
"eval_samples_per_second": 23.747, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 13.829787234042554, |
|
"eval_loss": 4.124392032623291, |
|
"eval_runtime": 208.9383, |
|
"eval_samples_per_second": 23.744, |
|
"eval_steps_per_second": 2.972, |
|
"eval_wer": 1.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 14.095744680851064, |
|
"eval_loss": 4.138058662414551, |
|
"eval_runtime": 208.8904, |
|
"eval_samples_per_second": 23.749, |
|
"eval_steps_per_second": 2.973, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 14.361702127659575, |
|
"eval_loss": 4.13184928894043, |
|
"eval_runtime": 209.1322, |
|
"eval_samples_per_second": 23.722, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"grad_norm": 1.6473333835601807, |
|
"learning_rate": 8.579766536964979e-06, |
|
"loss": 4.1079, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.627659574468085, |
|
"eval_loss": 4.131294250488281, |
|
"eval_runtime": 209.206, |
|
"eval_samples_per_second": 23.713, |
|
"eval_steps_per_second": 2.968, |
|
"eval_wer": 1.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 14.893617021276595, |
|
"eval_loss": 4.130970478057861, |
|
"eval_runtime": 209.3258, |
|
"eval_samples_per_second": 23.7, |
|
"eval_steps_per_second": 2.967, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 5640, |
|
"total_flos": 7.889727743335047e+18, |
|
"train_loss": 4.629210873867603, |
|
"train_runtime": 29472.0498, |
|
"train_samples_per_second": 6.124, |
|
"train_steps_per_second": 0.191 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.889727743335047e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|