class_code_style_transformer / trainer_state.json
codestylist's picture
Upload trainer_state.json
9b5f408
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9982255399865387,
"global_step": 49000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.001979603907891248,
"loss": 0.0528,
"step": 500
},
{
"epoch": 0.06,
"learning_rate": 0.0019592078157824964,
"loss": 0.0543,
"step": 1000
},
{
"epoch": 0.09,
"learning_rate": 0.0019388117236737441,
"loss": 0.0531,
"step": 1500
},
{
"epoch": 0.12,
"learning_rate": 0.001918415631564992,
"loss": 0.0534,
"step": 2000
},
{
"epoch": 0.15,
"learning_rate": 0.0018980195394562402,
"loss": 0.0528,
"step": 2500
},
{
"epoch": 0.18,
"learning_rate": 0.0018776234473474882,
"loss": 0.052,
"step": 3000
},
{
"epoch": 0.21,
"learning_rate": 0.0018572273552387361,
"loss": 0.0518,
"step": 3500
},
{
"epoch": 0.24,
"learning_rate": 0.0018368312631299843,
"loss": 0.0509,
"step": 4000
},
{
"epoch": 0.28,
"learning_rate": 0.0018164351710212323,
"loss": 0.0507,
"step": 4500
},
{
"epoch": 0.31,
"learning_rate": 0.0017960390789124802,
"loss": 0.049,
"step": 5000
},
{
"epoch": 0.34,
"learning_rate": 0.0017756429868037286,
"loss": 0.0487,
"step": 5500
},
{
"epoch": 0.37,
"learning_rate": 0.0017552468946949765,
"loss": 0.0484,
"step": 6000
},
{
"epoch": 0.4,
"learning_rate": 0.0017348508025862245,
"loss": 0.0498,
"step": 6500
},
{
"epoch": 0.43,
"learning_rate": 0.0017144547104774727,
"loss": 0.0507,
"step": 7000
},
{
"epoch": 0.46,
"learning_rate": 0.0016940586183687206,
"loss": 0.0467,
"step": 7500
},
{
"epoch": 0.49,
"learning_rate": 0.0016736625262599688,
"loss": 0.0469,
"step": 8000
},
{
"epoch": 0.52,
"learning_rate": 0.0016532664341512167,
"loss": 0.0473,
"step": 8500
},
{
"epoch": 0.55,
"learning_rate": 0.0016328703420424647,
"loss": 0.047,
"step": 9000
},
{
"epoch": 0.58,
"learning_rate": 0.0016124742499337129,
"loss": 0.0465,
"step": 9500
},
{
"epoch": 0.61,
"learning_rate": 0.0015920781578249608,
"loss": 0.0461,
"step": 10000
},
{
"epoch": 0.64,
"learning_rate": 0.0015716820657162088,
"loss": 0.045,
"step": 10500
},
{
"epoch": 0.67,
"learning_rate": 0.001551285973607457,
"loss": 0.0468,
"step": 11000
},
{
"epoch": 0.7,
"learning_rate": 0.0015308898814987049,
"loss": 0.0445,
"step": 11500
},
{
"epoch": 0.73,
"learning_rate": 0.0015104937893899528,
"loss": 0.0443,
"step": 12000
},
{
"epoch": 0.76,
"learning_rate": 0.001490097697281201,
"loss": 0.0439,
"step": 12500
},
{
"epoch": 0.8,
"learning_rate": 0.001469701605172449,
"loss": 0.0437,
"step": 13000
},
{
"epoch": 0.83,
"learning_rate": 0.001449305513063697,
"loss": 0.0432,
"step": 13500
},
{
"epoch": 0.86,
"learning_rate": 0.001428909420954945,
"loss": 0.0434,
"step": 14000
},
{
"epoch": 0.89,
"learning_rate": 0.001408513328846193,
"loss": 0.044,
"step": 14500
},
{
"epoch": 0.92,
"learning_rate": 0.001388117236737441,
"loss": 0.0433,
"step": 15000
},
{
"epoch": 0.95,
"learning_rate": 0.0013677211446286891,
"loss": 0.0427,
"step": 15500
},
{
"epoch": 0.98,
"learning_rate": 0.001347325052519937,
"loss": 0.043,
"step": 16000
},
{
"epoch": 1.0,
"eval_loss": 0.03870956972241402,
"eval_runtime": 667.1114,
"eval_samples_per_second": 146.987,
"eval_steps_per_second": 6.125,
"step": 16343
},
{
"epoch": 1.01,
"learning_rate": 0.0013269289604111853,
"loss": 0.0404,
"step": 16500
},
{
"epoch": 1.04,
"learning_rate": 0.0013065328683024334,
"loss": 0.037,
"step": 17000
},
{
"epoch": 1.07,
"learning_rate": 0.0012861367761936814,
"loss": 0.0371,
"step": 17500
},
{
"epoch": 1.1,
"learning_rate": 0.0012657406840849293,
"loss": 0.0372,
"step": 18000
},
{
"epoch": 1.13,
"learning_rate": 0.0012453445919761775,
"loss": 0.0386,
"step": 18500
},
{
"epoch": 1.16,
"learning_rate": 0.0012249484998674255,
"loss": 0.0377,
"step": 19000
},
{
"epoch": 1.19,
"learning_rate": 0.0012045524077586734,
"loss": 0.0385,
"step": 19500
},
{
"epoch": 1.22,
"learning_rate": 0.0011841563156499216,
"loss": 0.0377,
"step": 20000
},
{
"epoch": 1.25,
"learning_rate": 0.0011637602235411695,
"loss": 0.0378,
"step": 20500
},
{
"epoch": 1.28,
"learning_rate": 0.0011433641314324175,
"loss": 0.0364,
"step": 21000
},
{
"epoch": 1.32,
"learning_rate": 0.0011229680393236656,
"loss": 0.0372,
"step": 21500
},
{
"epoch": 1.35,
"learning_rate": 0.0011025719472149136,
"loss": 0.0372,
"step": 22000
},
{
"epoch": 1.38,
"learning_rate": 0.0010821758551061618,
"loss": 0.0387,
"step": 22500
},
{
"epoch": 1.41,
"learning_rate": 0.0010617797629974097,
"loss": 0.0387,
"step": 23000
},
{
"epoch": 1.44,
"learning_rate": 0.0010413836708886577,
"loss": 0.0387,
"step": 23500
},
{
"epoch": 1.47,
"learning_rate": 0.0010209875787799058,
"loss": 0.0363,
"step": 24000
},
{
"epoch": 1.5,
"learning_rate": 0.0010005914866711538,
"loss": 0.0385,
"step": 24500
},
{
"epoch": 1.53,
"learning_rate": 0.0009801953945624017,
"loss": 0.0387,
"step": 25000
},
{
"epoch": 1.56,
"learning_rate": 0.0009597993024536499,
"loss": 0.0381,
"step": 25500
},
{
"epoch": 1.59,
"learning_rate": 0.000939403210344898,
"loss": 0.039,
"step": 26000
},
{
"epoch": 1.62,
"learning_rate": 0.000919007118236146,
"loss": 0.0369,
"step": 26500
},
{
"epoch": 1.65,
"learning_rate": 0.000898611026127394,
"loss": 0.0372,
"step": 27000
},
{
"epoch": 1.68,
"learning_rate": 0.000878214934018642,
"loss": 0.0379,
"step": 27500
},
{
"epoch": 1.71,
"learning_rate": 0.0008578188419098901,
"loss": 0.0367,
"step": 28000
},
{
"epoch": 1.74,
"learning_rate": 0.0008374227498011381,
"loss": 0.0361,
"step": 28500
},
{
"epoch": 1.77,
"learning_rate": 0.0008170266576923861,
"loss": 0.0364,
"step": 29000
},
{
"epoch": 1.81,
"learning_rate": 0.0007966305655836342,
"loss": 0.0361,
"step": 29500
},
{
"epoch": 1.84,
"learning_rate": 0.0007762344734748822,
"loss": 0.0359,
"step": 30000
},
{
"epoch": 1.87,
"learning_rate": 0.0007558383813661303,
"loss": 0.0357,
"step": 30500
},
{
"epoch": 1.9,
"learning_rate": 0.0007354422892573784,
"loss": 0.0358,
"step": 31000
},
{
"epoch": 1.93,
"learning_rate": 0.0007150461971486263,
"loss": 0.0357,
"step": 31500
},
{
"epoch": 1.96,
"learning_rate": 0.0006946501050398744,
"loss": 0.0352,
"step": 32000
},
{
"epoch": 1.99,
"learning_rate": 0.0006742540129311224,
"loss": 0.0351,
"step": 32500
},
{
"epoch": 2.0,
"eval_loss": 0.03284740820527077,
"eval_runtime": 666.0258,
"eval_samples_per_second": 147.227,
"eval_steps_per_second": 6.135,
"step": 32686
},
{
"epoch": 2.02,
"learning_rate": 0.0006538579208223705,
"loss": 0.0325,
"step": 33000
},
{
"epoch": 2.05,
"learning_rate": 0.0006334618287136184,
"loss": 0.0314,
"step": 33500
},
{
"epoch": 2.08,
"learning_rate": 0.0006130657366048665,
"loss": 0.0308,
"step": 34000
},
{
"epoch": 2.11,
"learning_rate": 0.0005926696444961146,
"loss": 0.0315,
"step": 34500
},
{
"epoch": 2.14,
"learning_rate": 0.0005722735523873625,
"loss": 0.0308,
"step": 35000
},
{
"epoch": 2.17,
"learning_rate": 0.0005518774602786107,
"loss": 0.0306,
"step": 35500
},
{
"epoch": 2.2,
"learning_rate": 0.0005314813681698587,
"loss": 0.0309,
"step": 36000
},
{
"epoch": 2.23,
"learning_rate": 0.0005110852760611067,
"loss": 0.0304,
"step": 36500
},
{
"epoch": 2.26,
"learning_rate": 0.0004906891839523547,
"loss": 0.0306,
"step": 37000
},
{
"epoch": 2.29,
"learning_rate": 0.00047029309184360276,
"loss": 0.0307,
"step": 37500
},
{
"epoch": 2.33,
"learning_rate": 0.0004498969997348508,
"loss": 0.0296,
"step": 38000
},
{
"epoch": 2.36,
"learning_rate": 0.0004295009076260988,
"loss": 0.0302,
"step": 38500
},
{
"epoch": 2.39,
"learning_rate": 0.0004091048155173469,
"loss": 0.0299,
"step": 39000
},
{
"epoch": 2.42,
"learning_rate": 0.00038870872340859494,
"loss": 0.0297,
"step": 39500
},
{
"epoch": 2.45,
"learning_rate": 0.00036831263129984295,
"loss": 0.0292,
"step": 40000
},
{
"epoch": 2.48,
"learning_rate": 0.000347916539191091,
"loss": 0.0302,
"step": 40500
},
{
"epoch": 2.51,
"learning_rate": 0.000327520447082339,
"loss": 0.0295,
"step": 41000
},
{
"epoch": 2.54,
"learning_rate": 0.0003071243549735871,
"loss": 0.0295,
"step": 41500
},
{
"epoch": 2.57,
"learning_rate": 0.00028672826286483513,
"loss": 0.0289,
"step": 42000
},
{
"epoch": 2.6,
"learning_rate": 0.00026633217075608314,
"loss": 0.0297,
"step": 42500
},
{
"epoch": 2.63,
"learning_rate": 0.00024593607864733115,
"loss": 0.0291,
"step": 43000
},
{
"epoch": 2.66,
"learning_rate": 0.0002255399865385792,
"loss": 0.0288,
"step": 43500
},
{
"epoch": 2.69,
"learning_rate": 0.00020514389442982727,
"loss": 0.0278,
"step": 44000
},
{
"epoch": 2.72,
"learning_rate": 0.0001847478023210753,
"loss": 0.0284,
"step": 44500
},
{
"epoch": 2.75,
"learning_rate": 0.0001643517102123233,
"loss": 0.0285,
"step": 45000
},
{
"epoch": 2.78,
"learning_rate": 0.00014395561810357136,
"loss": 0.0278,
"step": 45500
},
{
"epoch": 2.81,
"learning_rate": 0.0001235595259948194,
"loss": 0.0286,
"step": 46000
},
{
"epoch": 2.85,
"learning_rate": 0.00010316343388606743,
"loss": 0.028,
"step": 46500
},
{
"epoch": 2.88,
"learning_rate": 8.276734177731548e-05,
"loss": 0.0279,
"step": 47000
},
{
"epoch": 2.91,
"learning_rate": 6.23712496685635e-05,
"loss": 0.0286,
"step": 47500
},
{
"epoch": 2.94,
"learning_rate": 4.197515755981154e-05,
"loss": 0.0278,
"step": 48000
},
{
"epoch": 2.97,
"learning_rate": 2.157906545105958e-05,
"loss": 0.0279,
"step": 48500
},
{
"epoch": 3.0,
"learning_rate": 1.1829733423076138e-06,
"loss": 0.0277,
"step": 49000
}
],
"max_steps": 49029,
"num_train_epochs": 3,
"total_flos": 1.591600637458514e+17,
"trial_name": null,
"trial_params": null
}