wav2vec2-xls-r-300m-phoneme-timit / trainer_state.json
jimregan's picture
checkpoint 10000
51d8d55
{
"best_metric": 0.3630259037017822,
"best_model_checkpoint": "./working/checkpoint-10000",
"epoch": 119.04761904761905,
"eval_steps": 1000,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.19,
"grad_norm": 5.874335765838623,
"learning_rate": 1.5e-06,
"loss": 12.5691,
"step": 100
},
{
"epoch": 2.38,
"grad_norm": 11.808794975280762,
"learning_rate": 2.9850000000000002e-06,
"loss": 9.8251,
"step": 200
},
{
"epoch": 3.57,
"grad_norm": 10.97357177734375,
"learning_rate": 4.485e-06,
"loss": 6.5329,
"step": 300
},
{
"epoch": 4.76,
"grad_norm": 10.752958297729492,
"learning_rate": 5.985e-06,
"loss": 5.2405,
"step": 400
},
{
"epoch": 5.95,
"grad_norm": 5.865973949432373,
"learning_rate": 7.485e-06,
"loss": 4.629,
"step": 500
},
{
"epoch": 7.14,
"grad_norm": 4.192840099334717,
"learning_rate": 8.985e-06,
"loss": 4.2437,
"step": 600
},
{
"epoch": 8.33,
"grad_norm": 3.2876224517822266,
"learning_rate": 1.0485e-05,
"loss": 3.9784,
"step": 700
},
{
"epoch": 9.52,
"grad_norm": 1.2407236099243164,
"learning_rate": 1.1985000000000001e-05,
"loss": 3.7904,
"step": 800
},
{
"epoch": 10.71,
"grad_norm": 0.6437963247299194,
"learning_rate": 1.3485e-05,
"loss": 3.6682,
"step": 900
},
{
"epoch": 11.9,
"grad_norm": 0.3295372724533081,
"learning_rate": 1.4985e-05,
"loss": 3.5325,
"step": 1000
},
{
"epoch": 11.9,
"eval_cer": 0.926601333602748,
"eval_loss": 3.4897494316101074,
"eval_runtime": 10.3726,
"eval_samples_per_second": 32.393,
"eval_steps_per_second": 4.049,
"eval_wer": 1.0,
"step": 1000
},
{
"epoch": 13.1,
"grad_norm": 0.61485356092453,
"learning_rate": 1.6485e-05,
"loss": 3.4934,
"step": 1100
},
{
"epoch": 14.29,
"grad_norm": 0.4778901934623718,
"learning_rate": 1.7985e-05,
"loss": 3.4662,
"step": 1200
},
{
"epoch": 15.48,
"grad_norm": 0.56944739818573,
"learning_rate": 1.9485e-05,
"loss": 3.4413,
"step": 1300
},
{
"epoch": 16.67,
"grad_norm": 0.37235376238822937,
"learning_rate": 2.0985e-05,
"loss": 3.4089,
"step": 1400
},
{
"epoch": 17.86,
"grad_norm": 0.4277956187725067,
"learning_rate": 2.2485000000000002e-05,
"loss": 3.3384,
"step": 1500
},
{
"epoch": 19.05,
"grad_norm": 0.50371253490448,
"learning_rate": 2.3985e-05,
"loss": 3.2455,
"step": 1600
},
{
"epoch": 20.24,
"grad_norm": 1.1383576393127441,
"learning_rate": 2.5485e-05,
"loss": 3.0681,
"step": 1700
},
{
"epoch": 21.43,
"grad_norm": 0.8668686747550964,
"learning_rate": 2.6985e-05,
"loss": 2.7949,
"step": 1800
},
{
"epoch": 22.62,
"grad_norm": 1.0731563568115234,
"learning_rate": 2.8485000000000003e-05,
"loss": 2.4866,
"step": 1900
},
{
"epoch": 23.81,
"grad_norm": 1.3317248821258545,
"learning_rate": 2.9985000000000002e-05,
"loss": 2.1973,
"step": 2000
},
{
"epoch": 23.81,
"eval_cer": 0.24030107092341887,
"eval_loss": 1.1350404024124146,
"eval_runtime": 10.3527,
"eval_samples_per_second": 32.455,
"eval_steps_per_second": 4.057,
"eval_wer": 0.839647119875454,
"step": 2000
},
{
"epoch": 25.0,
"grad_norm": 2.0239453315734863,
"learning_rate": 2.962875e-05,
"loss": 1.9821,
"step": 2100
},
{
"epoch": 26.19,
"grad_norm": 1.464921236038208,
"learning_rate": 2.925375e-05,
"loss": 1.853,
"step": 2200
},
{
"epoch": 27.38,
"grad_norm": 1.6508703231811523,
"learning_rate": 2.887875e-05,
"loss": 1.7547,
"step": 2300
},
{
"epoch": 28.57,
"grad_norm": 1.3476407527923584,
"learning_rate": 2.850375e-05,
"loss": 1.7171,
"step": 2400
},
{
"epoch": 29.76,
"grad_norm": 1.2977994680404663,
"learning_rate": 2.812875e-05,
"loss": 1.6498,
"step": 2500
},
{
"epoch": 30.95,
"grad_norm": 1.8536533117294312,
"learning_rate": 2.775375e-05,
"loss": 1.5965,
"step": 2600
},
{
"epoch": 32.14,
"grad_norm": 1.7063647508621216,
"learning_rate": 2.7378750000000003e-05,
"loss": 1.5744,
"step": 2700
},
{
"epoch": 33.33,
"grad_norm": 1.613274097442627,
"learning_rate": 2.700375e-05,
"loss": 1.5483,
"step": 2800
},
{
"epoch": 34.52,
"grad_norm": 1.6182752847671509,
"learning_rate": 2.662875e-05,
"loss": 1.5076,
"step": 2900
},
{
"epoch": 35.71,
"grad_norm": 1.9291083812713623,
"learning_rate": 2.6253750000000003e-05,
"loss": 1.4762,
"step": 3000
},
{
"epoch": 35.71,
"eval_cer": 0.15634471610426348,
"eval_loss": 0.527005672454834,
"eval_runtime": 10.3175,
"eval_samples_per_second": 32.566,
"eval_steps_per_second": 4.071,
"eval_wer": 0.6844836533471718,
"step": 3000
},
{
"epoch": 36.9,
"grad_norm": 1.7794642448425293,
"learning_rate": 2.587875e-05,
"loss": 1.4752,
"step": 3100
},
{
"epoch": 38.1,
"grad_norm": 1.7410004138946533,
"learning_rate": 2.550375e-05,
"loss": 1.4586,
"step": 3200
},
{
"epoch": 39.29,
"grad_norm": 1.6410831212997437,
"learning_rate": 2.512875e-05,
"loss": 1.4172,
"step": 3300
},
{
"epoch": 40.48,
"grad_norm": 2.350106954574585,
"learning_rate": 2.475375e-05,
"loss": 1.3751,
"step": 3400
},
{
"epoch": 41.67,
"grad_norm": 1.7794309854507446,
"learning_rate": 2.437875e-05,
"loss": 1.3516,
"step": 3500
},
{
"epoch": 42.86,
"grad_norm": 1.8536804914474487,
"learning_rate": 2.400375e-05,
"loss": 1.338,
"step": 3600
},
{
"epoch": 44.05,
"grad_norm": 2.043091058731079,
"learning_rate": 2.362875e-05,
"loss": 1.31,
"step": 3700
},
{
"epoch": 45.24,
"grad_norm": 2.556605577468872,
"learning_rate": 2.325375e-05,
"loss": 1.2737,
"step": 3800
},
{
"epoch": 46.43,
"grad_norm": 2.167360544204712,
"learning_rate": 2.2878750000000002e-05,
"loss": 1.2706,
"step": 3900
},
{
"epoch": 47.62,
"grad_norm": 1.933358907699585,
"learning_rate": 2.2503750000000003e-05,
"loss": 1.2409,
"step": 4000
},
{
"epoch": 47.62,
"eval_cer": 0.140280864821176,
"eval_loss": 0.41946256160736084,
"eval_runtime": 10.4076,
"eval_samples_per_second": 32.284,
"eval_steps_per_second": 4.036,
"eval_wer": 0.6331084587441619,
"step": 4000
},
{
"epoch": 48.81,
"grad_norm": 2.1927788257598877,
"learning_rate": 2.212875e-05,
"loss": 1.2476,
"step": 4100
},
{
"epoch": 50.0,
"grad_norm": 3.7042958736419678,
"learning_rate": 2.175375e-05,
"loss": 1.2211,
"step": 4200
},
{
"epoch": 51.19,
"grad_norm": 2.503298282623291,
"learning_rate": 2.13825e-05,
"loss": 1.1974,
"step": 4300
},
{
"epoch": 52.38,
"grad_norm": 2.378753423690796,
"learning_rate": 2.101125e-05,
"loss": 1.19,
"step": 4400
},
{
"epoch": 53.57,
"grad_norm": 6.145068645477295,
"learning_rate": 2.063625e-05,
"loss": 1.1734,
"step": 4500
},
{
"epoch": 54.76,
"grad_norm": 2.2741551399230957,
"learning_rate": 2.026125e-05,
"loss": 1.1664,
"step": 4600
},
{
"epoch": 55.95,
"grad_norm": 3.21976900100708,
"learning_rate": 1.988625e-05,
"loss": 1.1555,
"step": 4700
},
{
"epoch": 57.14,
"grad_norm": 3.06923508644104,
"learning_rate": 1.951125e-05,
"loss": 1.1391,
"step": 4800
},
{
"epoch": 58.33,
"grad_norm": 1.8809341192245483,
"learning_rate": 1.9136249999999998e-05,
"loss": 1.1271,
"step": 4900
},
{
"epoch": 59.52,
"grad_norm": 2.041844367980957,
"learning_rate": 1.876125e-05,
"loss": 1.1241,
"step": 5000
},
{
"epoch": 59.52,
"eval_cer": 0.13785613255203072,
"eval_loss": 0.38446417450904846,
"eval_runtime": 10.417,
"eval_samples_per_second": 32.255,
"eval_steps_per_second": 4.032,
"eval_wer": 0.63622210690192,
"step": 5000
},
{
"epoch": 60.71,
"grad_norm": 2.3043465614318848,
"learning_rate": 1.838625e-05,
"loss": 1.1042,
"step": 5100
},
{
"epoch": 61.9,
"grad_norm": 3.682835340499878,
"learning_rate": 1.801125e-05,
"loss": 1.0921,
"step": 5200
},
{
"epoch": 63.1,
"grad_norm": 4.466809272766113,
"learning_rate": 1.7636250000000002e-05,
"loss": 1.095,
"step": 5300
},
{
"epoch": 64.29,
"grad_norm": 2.615339994430542,
"learning_rate": 1.726125e-05,
"loss": 1.1057,
"step": 5400
},
{
"epoch": 65.48,
"grad_norm": 3.483346700668335,
"learning_rate": 1.688625e-05,
"loss": 1.0553,
"step": 5500
},
{
"epoch": 66.67,
"grad_norm": 2.141965866088867,
"learning_rate": 1.651125e-05,
"loss": 1.0656,
"step": 5600
},
{
"epoch": 67.86,
"grad_norm": 2.2111611366271973,
"learning_rate": 1.613625e-05,
"loss": 1.0673,
"step": 5700
},
{
"epoch": 69.05,
"grad_norm": 2.071429491043091,
"learning_rate": 1.576125e-05,
"loss": 1.0632,
"step": 5800
},
{
"epoch": 70.24,
"grad_norm": 4.86116886138916,
"learning_rate": 1.538625e-05,
"loss": 1.0447,
"step": 5900
},
{
"epoch": 71.43,
"grad_norm": 3.3076369762420654,
"learning_rate": 1.5011250000000001e-05,
"loss": 1.024,
"step": 6000
},
{
"epoch": 71.43,
"eval_cer": 0.13548191553849262,
"eval_loss": 0.3715837895870209,
"eval_runtime": 10.3955,
"eval_samples_per_second": 32.322,
"eval_steps_per_second": 4.04,
"eval_wer": 0.6320705760249092,
"step": 6000
},
{
"epoch": 72.62,
"grad_norm": 2.3496508598327637,
"learning_rate": 1.463625e-05,
"loss": 1.0379,
"step": 6100
},
{
"epoch": 73.81,
"grad_norm": 2.621004343032837,
"learning_rate": 1.426125e-05,
"loss": 1.0515,
"step": 6200
},
{
"epoch": 75.0,
"grad_norm": 5.240926742553711,
"learning_rate": 1.388625e-05,
"loss": 1.0253,
"step": 6300
},
{
"epoch": 76.19,
"grad_norm": 2.9943532943725586,
"learning_rate": 1.351125e-05,
"loss": 1.0131,
"step": 6400
},
{
"epoch": 77.38,
"grad_norm": 2.475804328918457,
"learning_rate": 1.3136250000000001e-05,
"loss": 1.0227,
"step": 6500
},
{
"epoch": 78.57,
"grad_norm": 2.5056631565093994,
"learning_rate": 1.2761250000000001e-05,
"loss": 1.0025,
"step": 6600
},
{
"epoch": 79.76,
"grad_norm": 3.9102323055267334,
"learning_rate": 1.238625e-05,
"loss": 1.0181,
"step": 6700
},
{
"epoch": 80.95,
"grad_norm": 3.3800106048583984,
"learning_rate": 1.201125e-05,
"loss": 0.9892,
"step": 6800
},
{
"epoch": 82.14,
"grad_norm": 2.165987014770508,
"learning_rate": 1.164e-05,
"loss": 0.9822,
"step": 6900
},
{
"epoch": 83.33,
"grad_norm": 2.426816463470459,
"learning_rate": 1.1265e-05,
"loss": 0.9922,
"step": 7000
},
{
"epoch": 83.33,
"eval_cer": 0.13310769852495455,
"eval_loss": 0.3727741539478302,
"eval_runtime": 10.3942,
"eval_samples_per_second": 32.326,
"eval_steps_per_second": 4.041,
"eval_wer": 0.628956927867151,
"step": 7000
},
{
"epoch": 84.52,
"grad_norm": 3.203552007675171,
"learning_rate": 1.089e-05,
"loss": 0.9995,
"step": 7100
},
{
"epoch": 85.71,
"grad_norm": 2.827246904373169,
"learning_rate": 1.0515e-05,
"loss": 0.9683,
"step": 7200
},
{
"epoch": 86.9,
"grad_norm": 2.9927895069122314,
"learning_rate": 1.0140000000000001e-05,
"loss": 0.9864,
"step": 7300
},
{
"epoch": 88.1,
"grad_norm": 2.151737928390503,
"learning_rate": 9.765e-06,
"loss": 0.9744,
"step": 7400
},
{
"epoch": 89.29,
"grad_norm": 2.5920581817626953,
"learning_rate": 9.39e-06,
"loss": 0.9794,
"step": 7500
},
{
"epoch": 90.48,
"grad_norm": 3.2127621173858643,
"learning_rate": 9.015e-06,
"loss": 0.9622,
"step": 7600
},
{
"epoch": 91.67,
"grad_norm": 3.541879892349243,
"learning_rate": 8.64e-06,
"loss": 0.9624,
"step": 7700
},
{
"epoch": 92.86,
"grad_norm": 2.827958345413208,
"learning_rate": 8.265000000000001e-06,
"loss": 0.9637,
"step": 7800
},
{
"epoch": 94.05,
"grad_norm": 2.648591995239258,
"learning_rate": 7.89e-06,
"loss": 0.9684,
"step": 7900
},
{
"epoch": 95.24,
"grad_norm": 4.264640808105469,
"learning_rate": 7.515e-06,
"loss": 0.9432,
"step": 8000
},
{
"epoch": 95.24,
"eval_cer": 0.13209739341281068,
"eval_loss": 0.3648131787776947,
"eval_runtime": 10.4236,
"eval_samples_per_second": 32.234,
"eval_steps_per_second": 4.029,
"eval_wer": 0.6170212765957447,
"step": 8000
},
{
"epoch": 96.43,
"grad_norm": 2.8603055477142334,
"learning_rate": 7.14e-06,
"loss": 0.9576,
"step": 8100
},
{
"epoch": 97.62,
"grad_norm": 2.931117296218872,
"learning_rate": 6.7650000000000005e-06,
"loss": 0.9579,
"step": 8200
},
{
"epoch": 98.81,
"grad_norm": 3.449780225753784,
"learning_rate": 6.39e-06,
"loss": 0.9535,
"step": 8300
},
{
"epoch": 100.0,
"grad_norm": 4.3435468673706055,
"learning_rate": 6.015000000000001e-06,
"loss": 0.9463,
"step": 8400
},
{
"epoch": 101.19,
"grad_norm": 2.2839837074279785,
"learning_rate": 5.64e-06,
"loss": 0.9413,
"step": 8500
},
{
"epoch": 102.38,
"grad_norm": 3.1021485328674316,
"learning_rate": 5.2649999999999996e-06,
"loss": 0.9436,
"step": 8600
},
{
"epoch": 103.57,
"grad_norm": 2.9421229362487793,
"learning_rate": 4.890000000000001e-06,
"loss": 0.939,
"step": 8700
},
{
"epoch": 104.76,
"grad_norm": 2.0578436851501465,
"learning_rate": 4.515e-06,
"loss": 0.9338,
"step": 8800
},
{
"epoch": 105.95,
"grad_norm": 3.5860297679901123,
"learning_rate": 4.14e-06,
"loss": 0.9315,
"step": 8900
},
{
"epoch": 107.14,
"grad_norm": 2.1002159118652344,
"learning_rate": 3.765e-06,
"loss": 0.9279,
"step": 9000
},
{
"epoch": 107.14,
"eval_cer": 0.13245100020206102,
"eval_loss": 0.3642527461051941,
"eval_runtime": 10.5077,
"eval_samples_per_second": 31.976,
"eval_steps_per_second": 3.997,
"eval_wer": 0.6248053969901401,
"step": 9000
},
{
"epoch": 108.33,
"grad_norm": 3.5053226947784424,
"learning_rate": 3.3975e-06,
"loss": 0.9489,
"step": 9100
},
{
"epoch": 109.52,
"grad_norm": 2.178657054901123,
"learning_rate": 3.0225000000000003e-06,
"loss": 0.9218,
"step": 9200
},
{
"epoch": 110.71,
"grad_norm": 2.9659178256988525,
"learning_rate": 2.6475e-06,
"loss": 0.928,
"step": 9300
},
{
"epoch": 111.9,
"grad_norm": 3.755510091781616,
"learning_rate": 2.2725e-06,
"loss": 0.9244,
"step": 9400
},
{
"epoch": 113.1,
"grad_norm": 2.2474422454833984,
"learning_rate": 1.8975e-06,
"loss": 0.9269,
"step": 9500
},
{
"epoch": 114.29,
"grad_norm": 4.289571285247803,
"learning_rate": 1.5225000000000002e-06,
"loss": 0.9353,
"step": 9600
},
{
"epoch": 115.48,
"grad_norm": 3.7131989002227783,
"learning_rate": 1.1475e-06,
"loss": 0.9169,
"step": 9700
},
{
"epoch": 116.67,
"grad_norm": 2.692605972290039,
"learning_rate": 7.725e-07,
"loss": 0.9325,
"step": 9800
},
{
"epoch": 117.86,
"grad_norm": 3.175621509552002,
"learning_rate": 3.975e-07,
"loss": 0.9163,
"step": 9900
},
{
"epoch": 119.05,
"grad_norm": 9.366300582885742,
"learning_rate": 2.25e-08,
"loss": 0.9268,
"step": 10000
},
{
"epoch": 119.05,
"eval_cer": 0.13159224085673873,
"eval_loss": 0.3630259037017822,
"eval_runtime": 10.5083,
"eval_samples_per_second": 31.975,
"eval_steps_per_second": 3.997,
"eval_wer": 0.6242864556305138,
"step": 10000
}
],
"logging_steps": 100,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 120,
"save_steps": 1000,
"total_flos": 3.012648370984383e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}