Hubert-common_voice-ja-demo / trainer_state.json
utakumi's picture
End of training
10d8292 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 100,
"global_step": 5640,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.26595744680851063,
"eval_loss": 17.797901153564453,
"eval_runtime": 208.2309,
"eval_samples_per_second": 23.825,
"eval_steps_per_second": 2.982,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.5319148936170213,
"eval_loss": 5.018890380859375,
"eval_runtime": 206.5228,
"eval_samples_per_second": 24.022,
"eval_steps_per_second": 3.007,
"eval_wer": 1.0,
"step": 200
},
{
"epoch": 0.7978723404255319,
"eval_loss": 4.124919891357422,
"eval_runtime": 206.6954,
"eval_samples_per_second": 24.001,
"eval_steps_per_second": 3.004,
"eval_wer": 1.0,
"step": 300
},
{
"epoch": 1.0638297872340425,
"eval_loss": 4.171339511871338,
"eval_runtime": 206.9498,
"eval_samples_per_second": 23.972,
"eval_steps_per_second": 3.001,
"eval_wer": 1.0,
"step": 400
},
{
"epoch": 1.3297872340425532,
"grad_norm": 2.336822509765625,
"learning_rate": 0.00029699999999999996,
"loss": 9.6222,
"step": 500
},
{
"epoch": 1.3297872340425532,
"eval_loss": 4.171350479125977,
"eval_runtime": 206.1998,
"eval_samples_per_second": 24.059,
"eval_steps_per_second": 3.012,
"eval_wer": 1.0,
"step": 500
},
{
"epoch": 1.5957446808510638,
"eval_loss": 4.168086051940918,
"eval_runtime": 206.1519,
"eval_samples_per_second": 24.065,
"eval_steps_per_second": 3.012,
"eval_wer": 1.0,
"step": 600
},
{
"epoch": 1.8617021276595744,
"eval_loss": 4.133743762969971,
"eval_runtime": 206.5072,
"eval_samples_per_second": 24.023,
"eval_steps_per_second": 3.007,
"eval_wer": 1.0,
"step": 700
},
{
"epoch": 2.127659574468085,
"eval_loss": 4.110419273376465,
"eval_runtime": 206.2896,
"eval_samples_per_second": 24.049,
"eval_steps_per_second": 3.01,
"eval_wer": 1.0,
"step": 800
},
{
"epoch": 2.393617021276596,
"eval_loss": 4.1108503341674805,
"eval_runtime": 206.9777,
"eval_samples_per_second": 23.969,
"eval_steps_per_second": 3.0,
"eval_wer": 1.0,
"step": 900
},
{
"epoch": 2.6595744680851063,
"grad_norm": 1.0139914751052856,
"learning_rate": 0.0002711089494163424,
"loss": 4.1243,
"step": 1000
},
{
"epoch": 2.6595744680851063,
"eval_loss": 4.111221790313721,
"eval_runtime": 207.2581,
"eval_samples_per_second": 23.936,
"eval_steps_per_second": 2.996,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 2.925531914893617,
"eval_loss": 4.116605758666992,
"eval_runtime": 206.7383,
"eval_samples_per_second": 23.997,
"eval_steps_per_second": 3.004,
"eval_wer": 1.0,
"step": 1100
},
{
"epoch": 3.1914893617021276,
"eval_loss": 4.143283367156982,
"eval_runtime": 207.4325,
"eval_samples_per_second": 23.916,
"eval_steps_per_second": 2.994,
"eval_wer": 1.0,
"step": 1200
},
{
"epoch": 3.4574468085106385,
"eval_loss": 4.188790321350098,
"eval_runtime": 206.5637,
"eval_samples_per_second": 24.017,
"eval_steps_per_second": 3.006,
"eval_wer": 1.0,
"step": 1300
},
{
"epoch": 3.723404255319149,
"eval_loss": 4.2255859375,
"eval_runtime": 206.5284,
"eval_samples_per_second": 24.021,
"eval_steps_per_second": 3.007,
"eval_wer": 1.0,
"step": 1400
},
{
"epoch": 3.9893617021276597,
"grad_norm": 2.125878095626831,
"learning_rate": 0.0002419260700389105,
"loss": 4.1253,
"step": 1500
},
{
"epoch": 3.9893617021276597,
"eval_loss": 4.14175271987915,
"eval_runtime": 206.1478,
"eval_samples_per_second": 24.065,
"eval_steps_per_second": 3.012,
"eval_wer": 1.0,
"step": 1500
},
{
"epoch": 4.25531914893617,
"eval_loss": 4.113326549530029,
"eval_runtime": 206.8791,
"eval_samples_per_second": 23.98,
"eval_steps_per_second": 3.002,
"eval_wer": 1.0,
"step": 1600
},
{
"epoch": 4.5212765957446805,
"eval_loss": 4.112410545349121,
"eval_runtime": 207.4656,
"eval_samples_per_second": 23.912,
"eval_steps_per_second": 2.993,
"eval_wer": 1.0,
"step": 1700
},
{
"epoch": 4.787234042553192,
"eval_loss": 4.113221645355225,
"eval_runtime": 206.6651,
"eval_samples_per_second": 24.005,
"eval_steps_per_second": 3.005,
"eval_wer": 1.0,
"step": 1800
},
{
"epoch": 5.053191489361702,
"eval_loss": 4.19992208480835,
"eval_runtime": 206.9983,
"eval_samples_per_second": 23.966,
"eval_steps_per_second": 3.0,
"eval_wer": 1.0,
"step": 1900
},
{
"epoch": 5.319148936170213,
"grad_norm": 1.172359585762024,
"learning_rate": 0.0002127431906614786,
"loss": 4.1182,
"step": 2000
},
{
"epoch": 5.319148936170213,
"eval_loss": 4.179160118103027,
"eval_runtime": 207.0024,
"eval_samples_per_second": 23.966,
"eval_steps_per_second": 3.0,
"eval_wer": 1.0,
"step": 2000
},
{
"epoch": 5.585106382978723,
"eval_loss": 4.211320400238037,
"eval_runtime": 208.18,
"eval_samples_per_second": 23.83,
"eval_steps_per_second": 2.983,
"eval_wer": 1.0,
"step": 2100
},
{
"epoch": 5.851063829787234,
"eval_loss": 4.20604133605957,
"eval_runtime": 207.2193,
"eval_samples_per_second": 23.941,
"eval_steps_per_second": 2.997,
"eval_wer": 1.0,
"step": 2200
},
{
"epoch": 6.117021276595745,
"eval_loss": 4.110590934753418,
"eval_runtime": 208.0433,
"eval_samples_per_second": 23.846,
"eval_steps_per_second": 2.985,
"eval_wer": 1.0,
"step": 2300
},
{
"epoch": 6.382978723404255,
"eval_loss": 4.112478256225586,
"eval_runtime": 207.5552,
"eval_samples_per_second": 23.902,
"eval_steps_per_second": 2.992,
"eval_wer": 1.0,
"step": 2400
},
{
"epoch": 6.648936170212766,
"grad_norm": 1.6996749639511108,
"learning_rate": 0.0001835603112840467,
"loss": 4.1207,
"step": 2500
},
{
"epoch": 6.648936170212766,
"eval_loss": 4.113041877746582,
"eval_runtime": 207.168,
"eval_samples_per_second": 23.947,
"eval_steps_per_second": 2.998,
"eval_wer": 1.0,
"step": 2500
},
{
"epoch": 6.914893617021277,
"eval_loss": 4.108416557312012,
"eval_runtime": 207.1242,
"eval_samples_per_second": 23.952,
"eval_steps_per_second": 2.998,
"eval_wer": 1.0,
"step": 2600
},
{
"epoch": 7.180851063829787,
"eval_loss": 4.170561790466309,
"eval_runtime": 210.6386,
"eval_samples_per_second": 23.552,
"eval_steps_per_second": 2.948,
"eval_wer": 1.0,
"step": 2700
},
{
"epoch": 7.446808510638298,
"eval_loss": 4.179252624511719,
"eval_runtime": 207.7058,
"eval_samples_per_second": 23.885,
"eval_steps_per_second": 2.99,
"eval_wer": 1.0,
"step": 2800
},
{
"epoch": 7.712765957446808,
"eval_loss": 4.18436336517334,
"eval_runtime": 207.7366,
"eval_samples_per_second": 23.881,
"eval_steps_per_second": 2.989,
"eval_wer": 1.0,
"step": 2900
},
{
"epoch": 7.9787234042553195,
"grad_norm": 3.874195098876953,
"learning_rate": 0.0001544941634241245,
"loss": 4.3809,
"step": 3000
},
{
"epoch": 7.9787234042553195,
"eval_loss": 4.143243789672852,
"eval_runtime": 208.1415,
"eval_samples_per_second": 23.835,
"eval_steps_per_second": 2.984,
"eval_wer": 1.0,
"step": 3000
},
{
"epoch": 8.24468085106383,
"eval_loss": 4.108314514160156,
"eval_runtime": 208.2271,
"eval_samples_per_second": 23.825,
"eval_steps_per_second": 2.982,
"eval_wer": 1.0,
"step": 3100
},
{
"epoch": 8.51063829787234,
"eval_loss": 4.117116928100586,
"eval_runtime": 208.0587,
"eval_samples_per_second": 23.844,
"eval_steps_per_second": 2.985,
"eval_wer": 1.0,
"step": 3200
},
{
"epoch": 8.77659574468085,
"eval_loss": 4.11330509185791,
"eval_runtime": 208.1596,
"eval_samples_per_second": 23.833,
"eval_steps_per_second": 2.983,
"eval_wer": 1.0,
"step": 3300
},
{
"epoch": 9.042553191489361,
"eval_loss": 4.163766384124756,
"eval_runtime": 208.0242,
"eval_samples_per_second": 23.848,
"eval_steps_per_second": 2.985,
"eval_wer": 1.0,
"step": 3400
},
{
"epoch": 9.308510638297872,
"grad_norm": 2.0024287700653076,
"learning_rate": 0.0001253112840466926,
"loss": 4.1164,
"step": 3500
},
{
"epoch": 9.308510638297872,
"eval_loss": 4.130943298339844,
"eval_runtime": 208.5062,
"eval_samples_per_second": 23.793,
"eval_steps_per_second": 2.978,
"eval_wer": 1.0,
"step": 3500
},
{
"epoch": 9.574468085106384,
"eval_loss": 4.1129841804504395,
"eval_runtime": 207.7504,
"eval_samples_per_second": 23.88,
"eval_steps_per_second": 2.989,
"eval_wer": 1.0,
"step": 3600
},
{
"epoch": 9.840425531914894,
"eval_loss": 4.127650737762451,
"eval_runtime": 208.5195,
"eval_samples_per_second": 23.792,
"eval_steps_per_second": 2.978,
"eval_wer": 1.0,
"step": 3700
},
{
"epoch": 10.106382978723405,
"eval_loss": 4.1156158447265625,
"eval_runtime": 208.3668,
"eval_samples_per_second": 23.809,
"eval_steps_per_second": 2.98,
"eval_wer": 1.0,
"step": 3800
},
{
"epoch": 10.372340425531915,
"eval_loss": 4.1260600090026855,
"eval_runtime": 208.8761,
"eval_samples_per_second": 23.751,
"eval_steps_per_second": 2.973,
"eval_wer": 1.0,
"step": 3900
},
{
"epoch": 10.638297872340425,
"grad_norm": 1.807287573814392,
"learning_rate": 9.61284046692607e-05,
"loss": 4.1195,
"step": 4000
},
{
"epoch": 10.638297872340425,
"eval_loss": 4.130354404449463,
"eval_runtime": 208.2427,
"eval_samples_per_second": 23.823,
"eval_steps_per_second": 2.982,
"eval_wer": 1.0,
"step": 4000
},
{
"epoch": 10.904255319148936,
"eval_loss": 4.128208637237549,
"eval_runtime": 208.9946,
"eval_samples_per_second": 23.737,
"eval_steps_per_second": 2.971,
"eval_wer": 1.0,
"step": 4100
},
{
"epoch": 11.170212765957446,
"eval_loss": 4.11016845703125,
"eval_runtime": 214.217,
"eval_samples_per_second": 23.159,
"eval_steps_per_second": 2.899,
"eval_wer": 1.0,
"step": 4200
},
{
"epoch": 11.436170212765958,
"eval_loss": 4.111141681671143,
"eval_runtime": 208.356,
"eval_samples_per_second": 23.81,
"eval_steps_per_second": 2.98,
"eval_wer": 1.0,
"step": 4300
},
{
"epoch": 11.702127659574469,
"eval_loss": 4.111063003540039,
"eval_runtime": 209.2862,
"eval_samples_per_second": 23.704,
"eval_steps_per_second": 2.967,
"eval_wer": 1.0,
"step": 4400
},
{
"epoch": 11.96808510638298,
"grad_norm": 2.2066569328308105,
"learning_rate": 6.69455252918288e-05,
"loss": 4.1164,
"step": 4500
},
{
"epoch": 11.96808510638298,
"eval_loss": 4.112732410430908,
"eval_runtime": 213.9347,
"eval_samples_per_second": 23.189,
"eval_steps_per_second": 2.903,
"eval_wer": 1.0,
"step": 4500
},
{
"epoch": 12.23404255319149,
"eval_loss": 4.141692638397217,
"eval_runtime": 214.7394,
"eval_samples_per_second": 23.102,
"eval_steps_per_second": 2.892,
"eval_wer": 1.0,
"step": 4600
},
{
"epoch": 12.5,
"eval_loss": 4.139012336730957,
"eval_runtime": 223.6932,
"eval_samples_per_second": 22.178,
"eval_steps_per_second": 2.776,
"eval_wer": 1.0,
"step": 4700
},
{
"epoch": 12.76595744680851,
"eval_loss": 4.143847942352295,
"eval_runtime": 207.9672,
"eval_samples_per_second": 23.855,
"eval_steps_per_second": 2.986,
"eval_wer": 1.0,
"step": 4800
},
{
"epoch": 13.03191489361702,
"eval_loss": 4.122192859649658,
"eval_runtime": 208.5333,
"eval_samples_per_second": 23.79,
"eval_steps_per_second": 2.978,
"eval_wer": 1.0,
"step": 4900
},
{
"epoch": 13.297872340425531,
"grad_norm": 0.3912961781024933,
"learning_rate": 3.776264591439688e-05,
"loss": 4.1117,
"step": 5000
},
{
"epoch": 13.297872340425531,
"eval_loss": 4.115208625793457,
"eval_runtime": 208.8419,
"eval_samples_per_second": 23.755,
"eval_steps_per_second": 2.974,
"eval_wer": 1.0,
"step": 5000
},
{
"epoch": 13.563829787234042,
"eval_loss": 4.124714374542236,
"eval_runtime": 208.9077,
"eval_samples_per_second": 23.747,
"eval_steps_per_second": 2.973,
"eval_wer": 1.0,
"step": 5100
},
{
"epoch": 13.829787234042554,
"eval_loss": 4.124392032623291,
"eval_runtime": 208.9383,
"eval_samples_per_second": 23.744,
"eval_steps_per_second": 2.972,
"eval_wer": 1.0,
"step": 5200
},
{
"epoch": 14.095744680851064,
"eval_loss": 4.138058662414551,
"eval_runtime": 208.8904,
"eval_samples_per_second": 23.749,
"eval_steps_per_second": 2.973,
"eval_wer": 1.0,
"step": 5300
},
{
"epoch": 14.361702127659575,
"eval_loss": 4.13184928894043,
"eval_runtime": 209.1322,
"eval_samples_per_second": 23.722,
"eval_steps_per_second": 2.969,
"eval_wer": 1.0,
"step": 5400
},
{
"epoch": 14.627659574468085,
"grad_norm": 1.6473333835601807,
"learning_rate": 8.579766536964979e-06,
"loss": 4.1079,
"step": 5500
},
{
"epoch": 14.627659574468085,
"eval_loss": 4.131294250488281,
"eval_runtime": 209.206,
"eval_samples_per_second": 23.713,
"eval_steps_per_second": 2.968,
"eval_wer": 1.0,
"step": 5500
},
{
"epoch": 14.893617021276595,
"eval_loss": 4.130970478057861,
"eval_runtime": 209.3258,
"eval_samples_per_second": 23.7,
"eval_steps_per_second": 2.967,
"eval_wer": 1.0,
"step": 5600
},
{
"epoch": 15.0,
"step": 5640,
"total_flos": 7.889727743335047e+18,
"train_loss": 4.629210873867603,
"train_runtime": 29472.0498,
"train_samples_per_second": 6.124,
"train_steps_per_second": 0.191
}
],
"logging_steps": 500,
"max_steps": 5640,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.889727743335047e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}