git-base-refines / trainer_state.json
kurileo's picture
Upload 8 files
508becb
raw
history blame
35.5 kB
{
"best_metric": 0.3922309875488281,
"best_model_checkpoint": "/data/users/yanyang/Projects/COCO_Caption_Refine/debug/git/2023-09-12-11-13-17_git-base/checkpoint-4800",
"epoch": 2.9702970297029703,
"global_step": 4800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 4.948432343234324e-05,
"loss": 6.7185,
"step": 50
},
{
"epoch": 0.03,
"eval_loss": 4.150215148925781,
"eval_runtime": 2.7394,
"eval_samples_per_second": 11.682,
"eval_steps_per_second": 5.841,
"eval_wer_score": 2.6076555023923444,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 4.8968646864686466e-05,
"loss": 2.2563,
"step": 100
},
{
"epoch": 0.06,
"eval_loss": 0.7511033415794373,
"eval_runtime": 2.511,
"eval_samples_per_second": 12.744,
"eval_steps_per_second": 6.372,
"eval_wer_score": 1.8782079164854284,
"step": 100
},
{
"epoch": 0.09,
"learning_rate": 4.8452970297029704e-05,
"loss": 0.6084,
"step": 150
},
{
"epoch": 0.09,
"eval_loss": 0.5207427144050598,
"eval_runtime": 3.1225,
"eval_samples_per_second": 10.248,
"eval_steps_per_second": 5.124,
"eval_wer_score": 2.3458025228360158,
"step": 150
},
{
"epoch": 0.12,
"learning_rate": 4.793729372937294e-05,
"loss": 0.5236,
"step": 200
},
{
"epoch": 0.12,
"eval_loss": 0.48739808797836304,
"eval_runtime": 3.4202,
"eval_samples_per_second": 9.356,
"eval_steps_per_second": 4.678,
"eval_wer_score": 2.6250543714658545,
"step": 200
},
{
"epoch": 0.15,
"learning_rate": 4.7421617161716174e-05,
"loss": 0.5022,
"step": 250
},
{
"epoch": 0.15,
"eval_loss": 0.4747964143753052,
"eval_runtime": 3.5933,
"eval_samples_per_second": 8.906,
"eval_steps_per_second": 4.453,
"eval_wer_score": 2.889952153110048,
"step": 250
},
{
"epoch": 0.19,
"learning_rate": 4.6905940594059406e-05,
"loss": 0.489,
"step": 300
},
{
"epoch": 0.19,
"eval_loss": 0.4658946096897125,
"eval_runtime": 3.2552,
"eval_samples_per_second": 9.83,
"eval_steps_per_second": 4.915,
"eval_wer_score": 3.058721183123097,
"step": 300
},
{
"epoch": 0.22,
"learning_rate": 4.6390264026402644e-05,
"loss": 0.477,
"step": 350
},
{
"epoch": 0.22,
"eval_loss": 0.46055227518081665,
"eval_runtime": 3.7594,
"eval_samples_per_second": 8.512,
"eval_steps_per_second": 4.256,
"eval_wer_score": 2.9904306220095696,
"step": 350
},
{
"epoch": 0.25,
"learning_rate": 4.5874587458745876e-05,
"loss": 0.4702,
"step": 400
},
{
"epoch": 0.25,
"eval_loss": 0.4569026827812195,
"eval_runtime": 3.9386,
"eval_samples_per_second": 8.125,
"eval_steps_per_second": 4.062,
"eval_wer_score": 2.941278816876903,
"step": 400
},
{
"epoch": 0.28,
"learning_rate": 4.5358910891089114e-05,
"loss": 0.4673,
"step": 450
},
{
"epoch": 0.28,
"eval_loss": 0.45087775588035583,
"eval_runtime": 2.9403,
"eval_samples_per_second": 10.883,
"eval_steps_per_second": 5.442,
"eval_wer_score": 3.01739886907351,
"step": 450
},
{
"epoch": 0.31,
"learning_rate": 4.4843234323432346e-05,
"loss": 0.4633,
"step": 500
},
{
"epoch": 0.31,
"eval_loss": 0.4465155601501465,
"eval_runtime": 3.3332,
"eval_samples_per_second": 9.6,
"eval_steps_per_second": 4.8,
"eval_wer_score": 3.111787733797303,
"step": 500
},
{
"epoch": 0.34,
"learning_rate": 4.432755775577558e-05,
"loss": 0.4579,
"step": 550
},
{
"epoch": 0.34,
"eval_loss": 0.44231322407722473,
"eval_runtime": 3.3294,
"eval_samples_per_second": 9.611,
"eval_steps_per_second": 4.806,
"eval_wer_score": 3.1004784688995217,
"step": 550
},
{
"epoch": 0.37,
"learning_rate": 4.3811881188118816e-05,
"loss": 0.4522,
"step": 600
},
{
"epoch": 0.37,
"eval_loss": 0.4409943222999573,
"eval_runtime": 4.0034,
"eval_samples_per_second": 7.993,
"eval_steps_per_second": 3.997,
"eval_wer_score": 3.08916920400174,
"step": 600
},
{
"epoch": 0.4,
"learning_rate": 4.329620462046205e-05,
"loss": 0.4495,
"step": 650
},
{
"epoch": 0.4,
"eval_loss": 0.4370802044868469,
"eval_runtime": 2.7858,
"eval_samples_per_second": 11.487,
"eval_steps_per_second": 5.743,
"eval_wer_score": 3.1792083514571554,
"step": 650
},
{
"epoch": 0.43,
"learning_rate": 4.278052805280528e-05,
"loss": 0.4498,
"step": 700
},
{
"epoch": 0.43,
"eval_loss": 0.43357548117637634,
"eval_runtime": 2.6149,
"eval_samples_per_second": 12.238,
"eval_steps_per_second": 6.119,
"eval_wer_score": 3.1222270552414093,
"step": 700
},
{
"epoch": 0.46,
"learning_rate": 4.226485148514852e-05,
"loss": 0.4461,
"step": 750
},
{
"epoch": 0.46,
"eval_loss": 0.4354948401451111,
"eval_runtime": 2.7939,
"eval_samples_per_second": 11.454,
"eval_steps_per_second": 5.727,
"eval_wer_score": 3.187037842540235,
"step": 750
},
{
"epoch": 0.5,
"learning_rate": 4.174917491749175e-05,
"loss": 0.4435,
"step": 800
},
{
"epoch": 0.5,
"eval_loss": 0.4297381043434143,
"eval_runtime": 2.5337,
"eval_samples_per_second": 12.63,
"eval_steps_per_second": 6.315,
"eval_wer_score": 3.207046541974772,
"step": 800
},
{
"epoch": 0.53,
"learning_rate": 4.123349834983499e-05,
"loss": 0.4392,
"step": 850
},
{
"epoch": 0.53,
"eval_loss": 0.4316774606704712,
"eval_runtime": 2.6742,
"eval_samples_per_second": 11.966,
"eval_steps_per_second": 5.983,
"eval_wer_score": 3.1857329273597217,
"step": 850
},
{
"epoch": 0.56,
"learning_rate": 4.071782178217822e-05,
"loss": 0.4385,
"step": 900
},
{
"epoch": 0.56,
"eval_loss": 0.42789211869239807,
"eval_runtime": 2.5419,
"eval_samples_per_second": 12.589,
"eval_steps_per_second": 6.294,
"eval_wer_score": 3.186602870813397,
"step": 900
},
{
"epoch": 0.59,
"learning_rate": 4.020214521452145e-05,
"loss": 0.4352,
"step": 950
},
{
"epoch": 0.59,
"eval_loss": 0.4274422526359558,
"eval_runtime": 2.5697,
"eval_samples_per_second": 12.453,
"eval_steps_per_second": 6.226,
"eval_wer_score": 3.23836450630709,
"step": 950
},
{
"epoch": 0.62,
"learning_rate": 3.968646864686469e-05,
"loss": 0.4354,
"step": 1000
},
{
"epoch": 0.62,
"eval_loss": 0.42688965797424316,
"eval_runtime": 2.9492,
"eval_samples_per_second": 10.85,
"eval_steps_per_second": 5.425,
"eval_wer_score": 3.192692474989126,
"step": 1000
},
{
"epoch": 0.65,
"learning_rate": 3.917079207920793e-05,
"loss": 0.4336,
"step": 1050
},
{
"epoch": 0.65,
"eval_loss": 0.42364591360092163,
"eval_runtime": 3.3776,
"eval_samples_per_second": 9.474,
"eval_steps_per_second": 4.737,
"eval_wer_score": 3.1705089169204004,
"step": 1050
},
{
"epoch": 0.68,
"learning_rate": 3.865511551155115e-05,
"loss": 0.4315,
"step": 1100
},
{
"epoch": 0.68,
"eval_loss": 0.42294472455978394,
"eval_runtime": 3.4043,
"eval_samples_per_second": 9.4,
"eval_steps_per_second": 4.7,
"eval_wer_score": 3.2618529795563287,
"step": 1100
},
{
"epoch": 0.71,
"learning_rate": 3.813943894389439e-05,
"loss": 0.4297,
"step": 1150
},
{
"epoch": 0.71,
"eval_loss": 0.4200877547264099,
"eval_runtime": 3.2244,
"eval_samples_per_second": 9.924,
"eval_steps_per_second": 4.962,
"eval_wer_score": 3.2818616789908654,
"step": 1150
},
{
"epoch": 0.74,
"learning_rate": 3.762376237623763e-05,
"loss": 0.429,
"step": 1200
},
{
"epoch": 0.74,
"eval_loss": 0.4193739593029022,
"eval_runtime": 3.0348,
"eval_samples_per_second": 10.544,
"eval_steps_per_second": 5.272,
"eval_wer_score": 3.281426707264028,
"step": 1200
},
{
"epoch": 0.77,
"learning_rate": 3.710808580858086e-05,
"loss": 0.4272,
"step": 1250
},
{
"epoch": 0.77,
"eval_loss": 0.41733482480049133,
"eval_runtime": 3.4043,
"eval_samples_per_second": 9.4,
"eval_steps_per_second": 4.7,
"eval_wer_score": 3.2501087429317095,
"step": 1250
},
{
"epoch": 0.8,
"learning_rate": 3.659240924092409e-05,
"loss": 0.4266,
"step": 1300
},
{
"epoch": 0.8,
"eval_loss": 0.4167550206184387,
"eval_runtime": 3.1991,
"eval_samples_per_second": 10.003,
"eval_steps_per_second": 5.001,
"eval_wer_score": 3.240539364941279,
"step": 1300
},
{
"epoch": 0.84,
"learning_rate": 3.607673267326733e-05,
"loss": 0.4257,
"step": 1350
},
{
"epoch": 0.84,
"eval_loss": 0.4143298268318176,
"eval_runtime": 3.1177,
"eval_samples_per_second": 10.264,
"eval_steps_per_second": 5.132,
"eval_wer_score": 3.2570682905611137,
"step": 1350
},
{
"epoch": 0.87,
"learning_rate": 3.556105610561056e-05,
"loss": 0.424,
"step": 1400
},
{
"epoch": 0.87,
"eval_loss": 0.4156000018119812,
"eval_runtime": 3.2132,
"eval_samples_per_second": 9.959,
"eval_steps_per_second": 4.979,
"eval_wer_score": 3.2035667681600697,
"step": 1400
},
{
"epoch": 0.9,
"learning_rate": 3.50453795379538e-05,
"loss": 0.4249,
"step": 1450
},
{
"epoch": 0.9,
"eval_loss": 0.4149695038795471,
"eval_runtime": 3.367,
"eval_samples_per_second": 9.504,
"eval_steps_per_second": 4.752,
"eval_wer_score": 3.2470639408438453,
"step": 1450
},
{
"epoch": 0.93,
"learning_rate": 3.452970297029703e-05,
"loss": 0.422,
"step": 1500
},
{
"epoch": 0.93,
"eval_loss": 0.4136950373649597,
"eval_runtime": 3.0485,
"eval_samples_per_second": 10.497,
"eval_steps_per_second": 5.249,
"eval_wer_score": 3.257938234014789,
"step": 1500
},
{
"epoch": 0.96,
"learning_rate": 3.4014026402640264e-05,
"loss": 0.4193,
"step": 1550
},
{
"epoch": 0.96,
"eval_loss": 0.41170167922973633,
"eval_runtime": 3.4654,
"eval_samples_per_second": 9.234,
"eval_steps_per_second": 4.617,
"eval_wer_score": 3.2470639408438453,
"step": 1550
},
{
"epoch": 0.99,
"learning_rate": 3.34983498349835e-05,
"loss": 0.4179,
"step": 1600
},
{
"epoch": 0.99,
"eval_loss": 0.412392795085907,
"eval_runtime": 3.3054,
"eval_samples_per_second": 9.681,
"eval_steps_per_second": 4.841,
"eval_wer_score": 3.2440191387559807,
"step": 1600
},
{
"epoch": 1.02,
"learning_rate": 3.2982673267326734e-05,
"loss": 0.4164,
"step": 1650
},
{
"epoch": 1.02,
"eval_loss": 0.41018491983413696,
"eval_runtime": 3.215,
"eval_samples_per_second": 9.953,
"eval_steps_per_second": 4.977,
"eval_wer_score": 3.2679425837320575,
"step": 1650
},
{
"epoch": 1.05,
"learning_rate": 3.2466996699669965e-05,
"loss": 0.4121,
"step": 1700
},
{
"epoch": 1.05,
"eval_loss": 0.4093266427516937,
"eval_runtime": 3.3525,
"eval_samples_per_second": 9.545,
"eval_steps_per_second": 4.773,
"eval_wer_score": 3.222705524140931,
"step": 1700
},
{
"epoch": 1.08,
"learning_rate": 3.1951320132013203e-05,
"loss": 0.4103,
"step": 1750
},
{
"epoch": 1.08,
"eval_loss": 0.4072987139225006,
"eval_runtime": 3.3812,
"eval_samples_per_second": 9.464,
"eval_steps_per_second": 4.732,
"eval_wer_score": 3.1874728142670725,
"step": 1750
},
{
"epoch": 1.11,
"learning_rate": 3.1435643564356435e-05,
"loss": 0.411,
"step": 1800
},
{
"epoch": 1.11,
"eval_loss": 0.4119817614555359,
"eval_runtime": 1.9138,
"eval_samples_per_second": 16.721,
"eval_steps_per_second": 8.36,
"eval_wer_score": 3.201391909525881,
"step": 1800
},
{
"epoch": 1.14,
"learning_rate": 3.0919966996699673e-05,
"loss": 0.4095,
"step": 1850
},
{
"epoch": 1.14,
"eval_loss": 0.4093400537967682,
"eval_runtime": 2.0537,
"eval_samples_per_second": 15.582,
"eval_steps_per_second": 7.791,
"eval_wer_score": 3.218790778599391,
"step": 1850
},
{
"epoch": 1.18,
"learning_rate": 3.0404290429042902e-05,
"loss": 0.4093,
"step": 1900
},
{
"epoch": 1.18,
"eval_loss": 0.4096407890319824,
"eval_runtime": 1.9992,
"eval_samples_per_second": 16.006,
"eval_steps_per_second": 8.003,
"eval_wer_score": 3.1705089169204004,
"step": 1900
},
{
"epoch": 1.21,
"learning_rate": 2.988861386138614e-05,
"loss": 0.4081,
"step": 1950
},
{
"epoch": 1.21,
"eval_loss": 0.4094192683696747,
"eval_runtime": 2.0819,
"eval_samples_per_second": 15.371,
"eval_steps_per_second": 7.685,
"eval_wer_score": 3.1966072205306655,
"step": 1950
},
{
"epoch": 1.24,
"learning_rate": 2.9372937293729375e-05,
"loss": 0.4083,
"step": 2000
},
{
"epoch": 1.24,
"eval_loss": 0.4079236090183258,
"eval_runtime": 1.9742,
"eval_samples_per_second": 16.209,
"eval_steps_per_second": 8.104,
"eval_wer_score": 3.2231404958677685,
"step": 2000
},
{
"epoch": 1.27,
"learning_rate": 2.885726072607261e-05,
"loss": 0.4065,
"step": 2050
},
{
"epoch": 1.27,
"eval_loss": 0.4078274965286255,
"eval_runtime": 1.9767,
"eval_samples_per_second": 16.189,
"eval_steps_per_second": 8.094,
"eval_wer_score": 3.2292301000434973,
"step": 2050
},
{
"epoch": 1.3,
"learning_rate": 2.834158415841584e-05,
"loss": 0.4074,
"step": 2100
},
{
"epoch": 1.3,
"eval_loss": 0.40426379442214966,
"eval_runtime": 2.1917,
"eval_samples_per_second": 14.601,
"eval_steps_per_second": 7.3,
"eval_wer_score": 3.2127011744236627,
"step": 2100
},
{
"epoch": 1.33,
"learning_rate": 2.7825907590759077e-05,
"loss": 0.4066,
"step": 2150
},
{
"epoch": 1.33,
"eval_loss": 0.40665364265441895,
"eval_runtime": 2.255,
"eval_samples_per_second": 14.191,
"eval_steps_per_second": 7.095,
"eval_wer_score": 3.2053066550674205,
"step": 2150
},
{
"epoch": 1.36,
"learning_rate": 2.731023102310231e-05,
"loss": 0.405,
"step": 2200
},
{
"epoch": 1.36,
"eval_loss": 0.4042993485927582,
"eval_runtime": 2.1192,
"eval_samples_per_second": 15.1,
"eval_steps_per_second": 7.55,
"eval_wer_score": 3.2448890822096566,
"step": 2200
},
{
"epoch": 1.39,
"learning_rate": 2.6794554455445547e-05,
"loss": 0.4051,
"step": 2250
},
{
"epoch": 1.39,
"eval_loss": 0.4049427807331085,
"eval_runtime": 1.9064,
"eval_samples_per_second": 16.786,
"eval_steps_per_second": 8.393,
"eval_wer_score": 3.2109612875163114,
"step": 2250
},
{
"epoch": 1.42,
"learning_rate": 2.6278877887788778e-05,
"loss": 0.4045,
"step": 2300
},
{
"epoch": 1.42,
"eval_loss": 0.4028187394142151,
"eval_runtime": 2.0181,
"eval_samples_per_second": 15.856,
"eval_steps_per_second": 7.928,
"eval_wer_score": 3.2035667681600697,
"step": 2300
},
{
"epoch": 1.45,
"learning_rate": 2.5763201320132013e-05,
"loss": 0.4045,
"step": 2350
},
{
"epoch": 1.45,
"eval_loss": 0.4024648070335388,
"eval_runtime": 2.027,
"eval_samples_per_second": 15.787,
"eval_steps_per_second": 7.894,
"eval_wer_score": 3.1757285776424533,
"step": 2350
},
{
"epoch": 1.49,
"learning_rate": 2.5247524752475248e-05,
"loss": 0.406,
"step": 2400
},
{
"epoch": 1.49,
"eval_loss": 0.400738000869751,
"eval_runtime": 2.1415,
"eval_samples_per_second": 14.942,
"eval_steps_per_second": 7.471,
"eval_wer_score": 3.204001739886907,
"step": 2400
},
{
"epoch": 1.52,
"learning_rate": 2.4731848184818483e-05,
"loss": 0.4021,
"step": 2450
},
{
"epoch": 1.52,
"eval_loss": 0.40221601724624634,
"eval_runtime": 2.237,
"eval_samples_per_second": 14.305,
"eval_steps_per_second": 7.152,
"eval_wer_score": 3.144410613310135,
"step": 2450
},
{
"epoch": 1.55,
"learning_rate": 2.4216171617161718e-05,
"loss": 0.4026,
"step": 2500
},
{
"epoch": 1.55,
"eval_loss": 0.4028313159942627,
"eval_runtime": 2.2063,
"eval_samples_per_second": 14.504,
"eval_steps_per_second": 7.252,
"eval_wer_score": 3.168769030013049,
"step": 2500
},
{
"epoch": 1.58,
"learning_rate": 2.370049504950495e-05,
"loss": 0.4014,
"step": 2550
},
{
"epoch": 1.58,
"eval_loss": 0.4026516079902649,
"eval_runtime": 2.131,
"eval_samples_per_second": 15.016,
"eval_steps_per_second": 7.508,
"eval_wer_score": 3.2000869943453676,
"step": 2550
},
{
"epoch": 1.61,
"learning_rate": 2.3184818481848185e-05,
"loss": 0.4015,
"step": 2600
},
{
"epoch": 1.61,
"eval_loss": 0.402204692363739,
"eval_runtime": 2.0851,
"eval_samples_per_second": 15.347,
"eval_steps_per_second": 7.673,
"eval_wer_score": 3.19182253153545,
"step": 2600
},
{
"epoch": 1.64,
"learning_rate": 2.266914191419142e-05,
"loss": 0.401,
"step": 2650
},
{
"epoch": 1.64,
"eval_loss": 0.40174347162246704,
"eval_runtime": 2.1944,
"eval_samples_per_second": 14.583,
"eval_steps_per_second": 7.291,
"eval_wer_score": 3.189212701174424,
"step": 2650
},
{
"epoch": 1.67,
"learning_rate": 2.2153465346534655e-05,
"loss": 0.4007,
"step": 2700
},
{
"epoch": 1.67,
"eval_loss": 0.40014830231666565,
"eval_runtime": 2.1544,
"eval_samples_per_second": 14.853,
"eval_steps_per_second": 7.427,
"eval_wer_score": 3.2148760330578514,
"step": 2700
},
{
"epoch": 1.7,
"learning_rate": 2.1637788778877886e-05,
"loss": 0.399,
"step": 2750
},
{
"epoch": 1.7,
"eval_loss": 0.3999301791191101,
"eval_runtime": 2.196,
"eval_samples_per_second": 14.572,
"eval_steps_per_second": 7.286,
"eval_wer_score": 3.177468464549804,
"step": 2750
},
{
"epoch": 1.73,
"learning_rate": 2.1122112211221125e-05,
"loss": 0.4004,
"step": 2800
},
{
"epoch": 1.73,
"eval_loss": 0.40041935443878174,
"eval_runtime": 2.2406,
"eval_samples_per_second": 14.282,
"eval_steps_per_second": 7.141,
"eval_wer_score": 3.186602870813397,
"step": 2800
},
{
"epoch": 1.76,
"learning_rate": 2.0606435643564356e-05,
"loss": 0.3988,
"step": 2850
},
{
"epoch": 1.76,
"eval_loss": 0.4005739092826843,
"eval_runtime": 2.1589,
"eval_samples_per_second": 14.822,
"eval_steps_per_second": 7.411,
"eval_wer_score": 3.2235754675946064,
"step": 2850
},
{
"epoch": 1.79,
"learning_rate": 2.009075907590759e-05,
"loss": 0.3985,
"step": 2900
},
{
"epoch": 1.79,
"eval_loss": 0.4012880325317383,
"eval_runtime": 2.1243,
"eval_samples_per_second": 15.064,
"eval_steps_per_second": 7.532,
"eval_wer_score": 3.2083514571552847,
"step": 2900
},
{
"epoch": 1.83,
"learning_rate": 1.9575082508250826e-05,
"loss": 0.3995,
"step": 2950
},
{
"epoch": 1.83,
"eval_loss": 0.3977855443954468,
"eval_runtime": 2.1918,
"eval_samples_per_second": 14.6,
"eval_steps_per_second": 7.3,
"eval_wer_score": 3.192257503262288,
"step": 2950
},
{
"epoch": 1.86,
"learning_rate": 1.905940594059406e-05,
"loss": 0.3975,
"step": 3000
},
{
"epoch": 1.86,
"eval_loss": 0.39725542068481445,
"eval_runtime": 2.2663,
"eval_samples_per_second": 14.12,
"eval_steps_per_second": 7.06,
"eval_wer_score": 3.174858634188778,
"step": 3000
},
{
"epoch": 1.89,
"learning_rate": 1.8543729372937293e-05,
"loss": 0.3976,
"step": 3050
},
{
"epoch": 1.89,
"eval_loss": 0.39580366015434265,
"eval_runtime": 2.2795,
"eval_samples_per_second": 14.038,
"eval_steps_per_second": 7.019,
"eval_wer_score": 3.12396694214876,
"step": 3050
},
{
"epoch": 1.92,
"learning_rate": 1.8028052805280528e-05,
"loss": 0.3977,
"step": 3100
},
{
"epoch": 1.92,
"eval_loss": 0.3961202800273895,
"eval_runtime": 2.1696,
"eval_samples_per_second": 14.749,
"eval_steps_per_second": 7.374,
"eval_wer_score": 3.165724227925185,
"step": 3100
},
{
"epoch": 1.95,
"learning_rate": 1.7512376237623763e-05,
"loss": 0.3945,
"step": 3150
},
{
"epoch": 1.95,
"eval_loss": 0.396453857421875,
"eval_runtime": 1.988,
"eval_samples_per_second": 16.097,
"eval_steps_per_second": 8.048,
"eval_wer_score": 3.1805132666376688,
"step": 3150
},
{
"epoch": 1.98,
"learning_rate": 1.6996699669966998e-05,
"loss": 0.3962,
"step": 3200
},
{
"epoch": 1.98,
"eval_loss": 0.39566469192504883,
"eval_runtime": 1.9188,
"eval_samples_per_second": 16.677,
"eval_steps_per_second": 8.339,
"eval_wer_score": 3.1852979556328838,
"step": 3200
},
{
"epoch": 2.01,
"learning_rate": 1.648102310231023e-05,
"loss": 0.3953,
"step": 3250
},
{
"epoch": 2.01,
"eval_loss": 0.39734578132629395,
"eval_runtime": 2.1888,
"eval_samples_per_second": 14.62,
"eval_steps_per_second": 7.31,
"eval_wer_score": 3.13571117877338,
"step": 3250
},
{
"epoch": 2.04,
"learning_rate": 1.5965346534653468e-05,
"loss": 0.3896,
"step": 3300
},
{
"epoch": 2.04,
"eval_loss": 0.3978061079978943,
"eval_runtime": 2.2259,
"eval_samples_per_second": 14.376,
"eval_steps_per_second": 7.188,
"eval_wer_score": 3.110047846889952,
"step": 3300
},
{
"epoch": 2.07,
"learning_rate": 1.54496699669967e-05,
"loss": 0.3907,
"step": 3350
},
{
"epoch": 2.07,
"eval_loss": 0.3961105942726135,
"eval_runtime": 2.5092,
"eval_samples_per_second": 12.753,
"eval_steps_per_second": 6.377,
"eval_wer_score": 3.1274467159634622,
"step": 3350
},
{
"epoch": 2.1,
"learning_rate": 1.4933993399339935e-05,
"loss": 0.3889,
"step": 3400
},
{
"epoch": 2.1,
"eval_loss": 0.3963559865951538,
"eval_runtime": 2.1557,
"eval_samples_per_second": 14.845,
"eval_steps_per_second": 7.422,
"eval_wer_score": 3.190517616354937,
"step": 3400
},
{
"epoch": 2.13,
"learning_rate": 1.4418316831683168e-05,
"loss": 0.3902,
"step": 3450
},
{
"epoch": 2.13,
"eval_loss": 0.3959140479564667,
"eval_runtime": 2.1754,
"eval_samples_per_second": 14.71,
"eval_steps_per_second": 7.355,
"eval_wer_score": 3.1857329273597217,
"step": 3450
},
{
"epoch": 2.17,
"learning_rate": 1.3902640264026403e-05,
"loss": 0.3902,
"step": 3500
},
{
"epoch": 2.17,
"eval_loss": 0.3955221176147461,
"eval_runtime": 2.075,
"eval_samples_per_second": 15.422,
"eval_steps_per_second": 7.711,
"eval_wer_score": 3.162244454110483,
"step": 3500
},
{
"epoch": 2.2,
"learning_rate": 1.3386963696369636e-05,
"loss": 0.3891,
"step": 3550
},
{
"epoch": 2.2,
"eval_loss": 0.39533841609954834,
"eval_runtime": 2.1894,
"eval_samples_per_second": 14.616,
"eval_steps_per_second": 7.308,
"eval_wer_score": 3.17442366246194,
"step": 3550
},
{
"epoch": 2.23,
"learning_rate": 1.2871287128712873e-05,
"loss": 0.3886,
"step": 3600
},
{
"epoch": 2.23,
"eval_loss": 0.3946349620819092,
"eval_runtime": 2.1424,
"eval_samples_per_second": 14.936,
"eval_steps_per_second": 7.468,
"eval_wer_score": 3.142670726402784,
"step": 3600
},
{
"epoch": 2.26,
"learning_rate": 1.2355610561056106e-05,
"loss": 0.388,
"step": 3650
},
{
"epoch": 2.26,
"eval_loss": 0.3959529995918274,
"eval_runtime": 2.1922,
"eval_samples_per_second": 14.597,
"eval_steps_per_second": 7.299,
"eval_wer_score": 3.1309264897781643,
"step": 3650
},
{
"epoch": 2.29,
"learning_rate": 1.1839933993399341e-05,
"loss": 0.3888,
"step": 3700
},
{
"epoch": 2.29,
"eval_loss": 0.39452987909317017,
"eval_runtime": 2.1798,
"eval_samples_per_second": 14.68,
"eval_steps_per_second": 7.34,
"eval_wer_score": 3.13571117877338,
"step": 3700
},
{
"epoch": 2.32,
"learning_rate": 1.1324257425742574e-05,
"loss": 0.3891,
"step": 3750
},
{
"epoch": 2.32,
"eval_loss": 0.39532509446144104,
"eval_runtime": 2.0097,
"eval_samples_per_second": 15.923,
"eval_steps_per_second": 7.961,
"eval_wer_score": 3.121357111787734,
"step": 3750
},
{
"epoch": 2.35,
"learning_rate": 1.080858085808581e-05,
"loss": 0.3883,
"step": 3800
},
{
"epoch": 2.35,
"eval_loss": 0.39508694410324097,
"eval_runtime": 2.1667,
"eval_samples_per_second": 14.769,
"eval_steps_per_second": 7.385,
"eval_wer_score": 3.1361461505002173,
"step": 3800
},
{
"epoch": 2.38,
"learning_rate": 1.0292904290429044e-05,
"loss": 0.3876,
"step": 3850
},
{
"epoch": 2.38,
"eval_loss": 0.39381179213523865,
"eval_runtime": 2.1778,
"eval_samples_per_second": 14.693,
"eval_steps_per_second": 7.347,
"eval_wer_score": 3.1309264897781643,
"step": 3850
},
{
"epoch": 2.41,
"learning_rate": 9.777227722772278e-06,
"loss": 0.3879,
"step": 3900
},
{
"epoch": 2.41,
"eval_loss": 0.39416271448135376,
"eval_runtime": 2.2392,
"eval_samples_per_second": 14.291,
"eval_steps_per_second": 7.145,
"eval_wer_score": 3.148325358851675,
"step": 3900
},
{
"epoch": 2.44,
"learning_rate": 9.261551155115513e-06,
"loss": 0.386,
"step": 3950
},
{
"epoch": 2.44,
"eval_loss": 0.39405977725982666,
"eval_runtime": 2.2548,
"eval_samples_per_second": 14.192,
"eval_steps_per_second": 7.096,
"eval_wer_score": 3.15311004784689,
"step": 3950
},
{
"epoch": 2.48,
"learning_rate": 8.745874587458746e-06,
"loss": 0.3862,
"step": 4000
},
{
"epoch": 2.48,
"eval_loss": 0.3948515057563782,
"eval_runtime": 2.2104,
"eval_samples_per_second": 14.477,
"eval_steps_per_second": 7.239,
"eval_wer_score": 3.1378860374075686,
"step": 4000
},
{
"epoch": 2.51,
"learning_rate": 8.230198019801981e-06,
"loss": 0.3876,
"step": 4050
},
{
"epoch": 2.51,
"eval_loss": 0.3954794704914093,
"eval_runtime": 2.1178,
"eval_samples_per_second": 15.11,
"eval_steps_per_second": 7.555,
"eval_wer_score": 3.1470204436711615,
"step": 4050
},
{
"epoch": 2.54,
"learning_rate": 7.714521452145216e-06,
"loss": 0.3876,
"step": 4100
},
{
"epoch": 2.54,
"eval_loss": 0.3942318856716156,
"eval_runtime": 2.1511,
"eval_samples_per_second": 14.876,
"eval_steps_per_second": 7.438,
"eval_wer_score": 3.1326663766855156,
"step": 4100
},
{
"epoch": 2.57,
"learning_rate": 7.198844884488449e-06,
"loss": 0.3858,
"step": 4150
},
{
"epoch": 2.57,
"eval_loss": 0.39369016885757446,
"eval_runtime": 2.2193,
"eval_samples_per_second": 14.419,
"eval_steps_per_second": 7.21,
"eval_wer_score": 3.1270117442366248,
"step": 4150
},
{
"epoch": 2.6,
"learning_rate": 6.6831683168316835e-06,
"loss": 0.3855,
"step": 4200
},
{
"epoch": 2.6,
"eval_loss": 0.3940153121948242,
"eval_runtime": 2.2424,
"eval_samples_per_second": 14.27,
"eval_steps_per_second": 7.135,
"eval_wer_score": 3.1491953023053503,
"step": 4200
},
{
"epoch": 2.63,
"learning_rate": 6.167491749174918e-06,
"loss": 0.3864,
"step": 4250
},
{
"epoch": 2.63,
"eval_loss": 0.3938477337360382,
"eval_runtime": 2.1981,
"eval_samples_per_second": 14.558,
"eval_steps_per_second": 7.279,
"eval_wer_score": 3.1431056981296215,
"step": 4250
},
{
"epoch": 2.66,
"learning_rate": 5.651815181518152e-06,
"loss": 0.3869,
"step": 4300
},
{
"epoch": 2.66,
"eval_loss": 0.3936881422996521,
"eval_runtime": 2.2164,
"eval_samples_per_second": 14.438,
"eval_steps_per_second": 7.219,
"eval_wer_score": 3.1583297085689432,
"step": 4300
},
{
"epoch": 2.69,
"learning_rate": 5.136138613861386e-06,
"loss": 0.3841,
"step": 4350
},
{
"epoch": 2.69,
"eval_loss": 0.39349794387817383,
"eval_runtime": 2.2175,
"eval_samples_per_second": 14.43,
"eval_steps_per_second": 7.215,
"eval_wer_score": 3.1278816876903,
"step": 4350
},
{
"epoch": 2.72,
"learning_rate": 4.62046204620462e-06,
"loss": 0.3866,
"step": 4400
},
{
"epoch": 2.72,
"eval_loss": 0.3936805725097656,
"eval_runtime": 1.9094,
"eval_samples_per_second": 16.759,
"eval_steps_per_second": 8.38,
"eval_wer_score": 3.119182253153545,
"step": 4400
},
{
"epoch": 2.75,
"learning_rate": 4.104785478547855e-06,
"loss": 0.3848,
"step": 4450
},
{
"epoch": 2.75,
"eval_loss": 0.3931500315666199,
"eval_runtime": 2.1055,
"eval_samples_per_second": 15.198,
"eval_steps_per_second": 7.599,
"eval_wer_score": 3.1235319704219227,
"step": 4450
},
{
"epoch": 2.78,
"learning_rate": 3.589108910891089e-06,
"loss": 0.3849,
"step": 4500
},
{
"epoch": 2.78,
"eval_loss": 0.39277058839797974,
"eval_runtime": 2.1028,
"eval_samples_per_second": 15.218,
"eval_steps_per_second": 7.609,
"eval_wer_score": 3.1313614615050023,
"step": 4500
},
{
"epoch": 2.82,
"learning_rate": 3.073432343234324e-06,
"loss": 0.383,
"step": 4550
},
{
"epoch": 2.82,
"eval_loss": 0.3926939368247986,
"eval_runtime": 2.1315,
"eval_samples_per_second": 15.013,
"eval_steps_per_second": 7.506,
"eval_wer_score": 3.1230969986950847,
"step": 4550
},
{
"epoch": 2.85,
"learning_rate": 2.557755775577558e-06,
"loss": 0.3839,
"step": 4600
},
{
"epoch": 2.85,
"eval_loss": 0.39277368783950806,
"eval_runtime": 2.1656,
"eval_samples_per_second": 14.776,
"eval_steps_per_second": 7.388,
"eval_wer_score": 3.115702479338843,
"step": 4600
},
{
"epoch": 2.88,
"learning_rate": 2.042079207920792e-06,
"loss": 0.3843,
"step": 4650
},
{
"epoch": 2.88,
"eval_loss": 0.3926578164100647,
"eval_runtime": 2.2527,
"eval_samples_per_second": 14.205,
"eval_steps_per_second": 7.103,
"eval_wer_score": 3.1226620269682472,
"step": 4650
},
{
"epoch": 2.91,
"learning_rate": 1.5264026402640265e-06,
"loss": 0.3862,
"step": 4700
},
{
"epoch": 2.91,
"eval_loss": 0.3923312723636627,
"eval_runtime": 2.1236,
"eval_samples_per_second": 15.069,
"eval_steps_per_second": 7.534,
"eval_wer_score": 3.1287516311439756,
"step": 4700
},
{
"epoch": 2.94,
"learning_rate": 1.0107260726072606e-06,
"loss": 0.3848,
"step": 4750
},
{
"epoch": 2.94,
"eval_loss": 0.3923192024230957,
"eval_runtime": 2.1085,
"eval_samples_per_second": 15.177,
"eval_steps_per_second": 7.588,
"eval_wer_score": 3.1448455850369728,
"step": 4750
},
{
"epoch": 2.97,
"learning_rate": 4.950495049504951e-07,
"loss": 0.3856,
"step": 4800
},
{
"epoch": 2.97,
"eval_loss": 0.3922309875488281,
"eval_runtime": 2.9258,
"eval_samples_per_second": 10.937,
"eval_steps_per_second": 5.469,
"eval_wer_score": 3.133536320139191,
"step": 4800
}
],
"max_steps": 4848,
"num_train_epochs": 3,
"total_flos": 5.746828131664773e+17,
"trial_name": null,
"trial_params": null
}