Joeran Bosma
Initial release
2739a45
raw
history blame
33.7 kB
{
"best_metric": 0.7846992611885071,
"best_model_checkpoint": "/output/zgt-roberta-large-finetuned-Gu21schedule-BS256-10ep/checkpoint-31926",
"epoch": 9.99912662907937,
"global_step": 32200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1,
"eval_accuracy": 0.12095168799057326,
"eval_loss": 6.969939231872559,
"eval_runtime": 4175.606,
"eval_samples_per_second": 24.712,
"eval_steps_per_second": 3.089,
"step": 313
},
{
"epoch": 0.16,
"learning_rate": 1.5527950310559007e-05,
"loss": 7.8141,
"step": 500
},
{
"epoch": 0.19,
"eval_accuracy": 0.1524057089518688,
"eval_loss": 6.233978271484375,
"eval_runtime": 4177.4061,
"eval_samples_per_second": 24.701,
"eval_steps_per_second": 3.088,
"step": 626
},
{
"epoch": 0.29,
"eval_accuracy": 0.16394060788435189,
"eval_loss": 6.122518539428711,
"eval_runtime": 4179.7801,
"eval_samples_per_second": 24.687,
"eval_steps_per_second": 3.086,
"step": 939
},
{
"epoch": 0.31,
"learning_rate": 3.1055900621118014e-05,
"loss": 6.1501,
"step": 1000
},
{
"epoch": 0.39,
"eval_accuracy": 0.16842972332423103,
"eval_loss": 6.045305252075195,
"eval_runtime": 4182.1677,
"eval_samples_per_second": 24.673,
"eval_steps_per_second": 3.084,
"step": 1252
},
{
"epoch": 0.47,
"learning_rate": 4.658385093167702e-05,
"loss": 6.0737,
"step": 1500
},
{
"epoch": 0.49,
"eval_accuracy": 0.16725142510331475,
"eval_loss": 5.987020969390869,
"eval_runtime": 4177.226,
"eval_samples_per_second": 24.702,
"eval_steps_per_second": 3.088,
"step": 1565
},
{
"epoch": 0.58,
"eval_accuracy": 0.17195286451581493,
"eval_loss": 5.9314141273498535,
"eval_runtime": 4165.2923,
"eval_samples_per_second": 24.773,
"eval_steps_per_second": 3.097,
"step": 1878
},
{
"epoch": 0.62,
"learning_rate": 6.211180124223603e-05,
"loss": 5.9753,
"step": 2000
},
{
"epoch": 0.68,
"eval_accuracy": 0.17336381929439706,
"eval_loss": 5.887312412261963,
"eval_runtime": 4179.8742,
"eval_samples_per_second": 24.687,
"eval_steps_per_second": 3.086,
"step": 2191
},
{
"epoch": 0.78,
"learning_rate": 7.763975155279503e-05,
"loss": 5.9008,
"step": 2500
},
{
"epoch": 0.78,
"eval_accuracy": 0.1737213537890283,
"eval_loss": 5.851128101348877,
"eval_runtime": 4181.0014,
"eval_samples_per_second": 24.68,
"eval_steps_per_second": 3.085,
"step": 2504
},
{
"epoch": 0.87,
"eval_accuracy": 0.1730111369884377,
"eval_loss": 5.819331645965576,
"eval_runtime": 4182.8162,
"eval_samples_per_second": 24.669,
"eval_steps_per_second": 3.084,
"step": 2817
},
{
"epoch": 0.93,
"learning_rate": 9.316770186335404e-05,
"loss": 5.8379,
"step": 3000
},
{
"epoch": 0.97,
"eval_accuracy": 0.17577215201939525,
"eval_loss": 5.785292625427246,
"eval_runtime": 4184.6612,
"eval_samples_per_second": 24.658,
"eval_steps_per_second": 3.082,
"step": 3130
},
{
"epoch": 1.07,
"eval_accuracy": 0.1759501659892642,
"eval_loss": 5.762609481811523,
"eval_runtime": 4182.8465,
"eval_samples_per_second": 24.669,
"eval_steps_per_second": 3.084,
"step": 3443
},
{
"epoch": 1.09,
"learning_rate": 9.903381642512077e-05,
"loss": 5.7885,
"step": 3500
},
{
"epoch": 1.17,
"eval_accuracy": 0.17622262356212584,
"eval_loss": 5.739773273468018,
"eval_runtime": 4170.2068,
"eval_samples_per_second": 24.744,
"eval_steps_per_second": 3.093,
"step": 3756
},
{
"epoch": 1.24,
"learning_rate": 9.730848861283644e-05,
"loss": 5.7464,
"step": 4000
},
{
"epoch": 1.26,
"eval_accuracy": 0.1766708743216783,
"eval_loss": 5.720291614532471,
"eval_runtime": 4172.2987,
"eval_samples_per_second": 24.731,
"eval_steps_per_second": 3.092,
"step": 4069
},
{
"epoch": 1.36,
"eval_accuracy": 0.17642257605364067,
"eval_loss": 5.696093559265137,
"eval_runtime": 4172.9563,
"eval_samples_per_second": 24.728,
"eval_steps_per_second": 3.091,
"step": 4382
},
{
"epoch": 1.4,
"learning_rate": 9.558316080055211e-05,
"loss": 5.7149,
"step": 4500
},
{
"epoch": 1.46,
"eval_accuracy": 0.17737878845804067,
"eval_loss": 5.683297634124756,
"eval_runtime": 4185.0558,
"eval_samples_per_second": 24.656,
"eval_steps_per_second": 3.082,
"step": 4695
},
{
"epoch": 1.55,
"learning_rate": 9.385783298826778e-05,
"loss": 5.6885,
"step": 5000
},
{
"epoch": 1.56,
"eval_accuracy": 0.1773234355905989,
"eval_loss": 5.670944690704346,
"eval_runtime": 4184.0795,
"eval_samples_per_second": 24.662,
"eval_steps_per_second": 3.083,
"step": 5008
},
{
"epoch": 1.65,
"eval_accuracy": 0.1774909217337557,
"eval_loss": 5.6566619873046875,
"eval_runtime": 4185.5299,
"eval_samples_per_second": 24.653,
"eval_steps_per_second": 3.082,
"step": 5321
},
{
"epoch": 1.71,
"learning_rate": 9.213250517598345e-05,
"loss": 5.666,
"step": 5500
},
{
"epoch": 1.75,
"eval_accuracy": 0.17712345558640866,
"eval_loss": 5.647720813751221,
"eval_runtime": 4178.6309,
"eval_samples_per_second": 24.694,
"eval_steps_per_second": 3.087,
"step": 5634
},
{
"epoch": 1.85,
"eval_accuracy": 0.1778983102903151,
"eval_loss": 5.632648944854736,
"eval_runtime": 4179.5279,
"eval_samples_per_second": 24.689,
"eval_steps_per_second": 3.086,
"step": 5947
},
{
"epoch": 1.86,
"learning_rate": 9.04071773636991e-05,
"loss": 5.6458,
"step": 6000
},
{
"epoch": 1.94,
"eval_accuracy": 0.1767171879623295,
"eval_loss": 5.626438140869141,
"eval_runtime": 4166.358,
"eval_samples_per_second": 24.767,
"eval_steps_per_second": 3.096,
"step": 6260
},
{
"epoch": 2.02,
"learning_rate": 8.868184955141477e-05,
"loss": 5.6293,
"step": 6500
},
{
"epoch": 2.04,
"eval_accuracy": 0.18033812690505616,
"eval_loss": 5.371770858764648,
"eval_runtime": 4187.0608,
"eval_samples_per_second": 24.644,
"eval_steps_per_second": 3.081,
"step": 6573
},
{
"epoch": 2.14,
"eval_accuracy": 0.22889979190190532,
"eval_loss": 4.608296871185303,
"eval_runtime": 4185.2213,
"eval_samples_per_second": 24.655,
"eval_steps_per_second": 3.082,
"step": 6886
},
{
"epoch": 2.17,
"learning_rate": 8.695652173913044e-05,
"loss": 4.9883,
"step": 7000
},
{
"epoch": 2.24,
"eval_accuracy": 0.4614915191453084,
"eval_loss": 3.1735970973968506,
"eval_runtime": 4184.0355,
"eval_samples_per_second": 24.662,
"eval_steps_per_second": 3.083,
"step": 7199
},
{
"epoch": 2.33,
"learning_rate": 8.523119392684611e-05,
"loss": 3.2514,
"step": 7500
},
{
"epoch": 2.33,
"eval_accuracy": 0.5874539351791631,
"eval_loss": 2.3033511638641357,
"eval_runtime": 4180.5231,
"eval_samples_per_second": 24.683,
"eval_steps_per_second": 3.085,
"step": 7512
},
{
"epoch": 2.43,
"eval_accuracy": 0.6407011451986726,
"eval_loss": 1.915069818496704,
"eval_runtime": 4169.2257,
"eval_samples_per_second": 24.75,
"eval_steps_per_second": 3.094,
"step": 7825
},
{
"epoch": 2.48,
"learning_rate": 8.350586611456177e-05,
"loss": 2.1211,
"step": 8000
},
{
"epoch": 2.53,
"eval_accuracy": 0.6671589447070213,
"eval_loss": 1.7218379974365234,
"eval_runtime": 4169.0733,
"eval_samples_per_second": 24.751,
"eval_steps_per_second": 3.094,
"step": 8138
},
{
"epoch": 2.62,
"eval_accuracy": 0.6849309492119486,
"eval_loss": 1.5953431129455566,
"eval_runtime": 4186.0587,
"eval_samples_per_second": 24.65,
"eval_steps_per_second": 3.081,
"step": 8451
},
{
"epoch": 2.64,
"learning_rate": 8.178053830227743e-05,
"loss": 1.7698,
"step": 8500
},
{
"epoch": 2.72,
"eval_accuracy": 0.6977890826334601,
"eval_loss": 1.5040490627288818,
"eval_runtime": 4187.545,
"eval_samples_per_second": 24.641,
"eval_steps_per_second": 3.08,
"step": 8764
},
{
"epoch": 2.79,
"learning_rate": 8.00552104899931e-05,
"loss": 1.5907,
"step": 9000
},
{
"epoch": 2.82,
"eval_accuracy": 0.7089186346832345,
"eval_loss": 1.4300199747085571,
"eval_runtime": 4186.4554,
"eval_samples_per_second": 24.648,
"eval_steps_per_second": 3.081,
"step": 9077
},
{
"epoch": 2.92,
"eval_accuracy": 0.716823811317197,
"eval_loss": 1.3781534433364868,
"eval_runtime": 4180.8901,
"eval_samples_per_second": 24.681,
"eval_steps_per_second": 3.085,
"step": 9390
},
{
"epoch": 2.95,
"learning_rate": 7.832988267770877e-05,
"loss": 1.4757,
"step": 9500
},
{
"epoch": 3.01,
"eval_accuracy": 0.7239590766169688,
"eval_loss": 1.3298745155334473,
"eval_runtime": 4169.3417,
"eval_samples_per_second": 24.749,
"eval_steps_per_second": 3.094,
"step": 9703
},
{
"epoch": 3.11,
"learning_rate": 7.660455486542444e-05,
"loss": 1.3919,
"step": 10000
},
{
"epoch": 3.11,
"eval_accuracy": 0.7302072364465025,
"eval_loss": 1.2895771265029907,
"eval_runtime": 4181.4693,
"eval_samples_per_second": 24.677,
"eval_steps_per_second": 3.085,
"step": 10016
},
{
"epoch": 3.21,
"eval_accuracy": 0.7356055265010495,
"eval_loss": 1.25456964969635,
"eval_runtime": 4169.8649,
"eval_samples_per_second": 24.746,
"eval_steps_per_second": 3.093,
"step": 10329
},
{
"epoch": 3.26,
"learning_rate": 7.48792270531401e-05,
"loss": 1.328,
"step": 10500
},
{
"epoch": 3.3,
"eval_accuracy": 0.7406319445625766,
"eval_loss": 1.2240813970565796,
"eval_runtime": 4170.5091,
"eval_samples_per_second": 24.742,
"eval_steps_per_second": 3.093,
"step": 10642
},
{
"epoch": 3.4,
"eval_accuracy": 0.745277920243707,
"eval_loss": 1.1944962739944458,
"eval_runtime": 4174.7942,
"eval_samples_per_second": 24.717,
"eval_steps_per_second": 3.09,
"step": 10955
},
{
"epoch": 3.42,
"learning_rate": 7.315389924085577e-05,
"loss": 1.2782,
"step": 11000
},
{
"epoch": 3.5,
"eval_accuracy": 0.7494278825432492,
"eval_loss": 1.1713906526565552,
"eval_runtime": 4174.964,
"eval_samples_per_second": 24.716,
"eval_steps_per_second": 3.09,
"step": 11268
},
{
"epoch": 3.57,
"learning_rate": 7.142857142857143e-05,
"loss": 1.2357,
"step": 11500
},
{
"epoch": 3.6,
"eval_accuracy": 0.7529051534363226,
"eval_loss": 1.1493370532989502,
"eval_runtime": 4169.6928,
"eval_samples_per_second": 24.747,
"eval_steps_per_second": 3.094,
"step": 11581
},
{
"epoch": 3.69,
"eval_accuracy": 0.7561643316896146,
"eval_loss": 1.1291333436965942,
"eval_runtime": 4170.8848,
"eval_samples_per_second": 24.74,
"eval_steps_per_second": 3.093,
"step": 11894
},
{
"epoch": 3.73,
"learning_rate": 6.970324361628709e-05,
"loss": 1.1986,
"step": 12000
},
{
"epoch": 3.79,
"eval_accuracy": 0.7596194578598549,
"eval_loss": 1.1110583543777466,
"eval_runtime": 4174.4741,
"eval_samples_per_second": 24.719,
"eval_steps_per_second": 3.09,
"step": 12207
},
{
"epoch": 3.88,
"learning_rate": 6.797791580400277e-05,
"loss": 1.1673,
"step": 12500
},
{
"epoch": 3.89,
"eval_accuracy": 0.7632696453538697,
"eval_loss": 1.091315507888794,
"eval_runtime": 4175.4221,
"eval_samples_per_second": 24.713,
"eval_steps_per_second": 3.089,
"step": 12520
},
{
"epoch": 3.99,
"eval_accuracy": 0.7654154836189226,
"eval_loss": 1.0766664743423462,
"eval_runtime": 4173.9192,
"eval_samples_per_second": 24.722,
"eval_steps_per_second": 3.09,
"step": 12833
},
{
"epoch": 4.04,
"learning_rate": 6.625258799171843e-05,
"loss": 1.1387,
"step": 13000
},
{
"epoch": 4.08,
"eval_accuracy": 0.7681167597878354,
"eval_loss": 1.0629887580871582,
"eval_runtime": 4175.5718,
"eval_samples_per_second": 24.712,
"eval_steps_per_second": 3.089,
"step": 13146
},
{
"epoch": 4.18,
"eval_accuracy": 0.7707155612474973,
"eval_loss": 1.046230673789978,
"eval_runtime": 4175.7461,
"eval_samples_per_second": 24.711,
"eval_steps_per_second": 3.089,
"step": 13459
},
{
"epoch": 4.19,
"learning_rate": 6.45272601794341e-05,
"loss": 1.1074,
"step": 13500
},
{
"epoch": 4.28,
"eval_accuracy": 0.7731418474891986,
"eval_loss": 1.0337274074554443,
"eval_runtime": 4174.5,
"eval_samples_per_second": 24.718,
"eval_steps_per_second": 3.09,
"step": 13772
},
{
"epoch": 4.35,
"learning_rate": 6.280193236714976e-05,
"loss": 1.0893,
"step": 14000
},
{
"epoch": 4.37,
"eval_accuracy": 0.7748741121676294,
"eval_loss": 1.0239219665527344,
"eval_runtime": 4175.3624,
"eval_samples_per_second": 24.713,
"eval_steps_per_second": 3.089,
"step": 14085
},
{
"epoch": 4.47,
"eval_accuracy": 0.7766543779252701,
"eval_loss": 1.014146089553833,
"eval_runtime": 4181.6475,
"eval_samples_per_second": 24.676,
"eval_steps_per_second": 3.085,
"step": 14398
},
{
"epoch": 4.5,
"learning_rate": 6.107660455486542e-05,
"loss": 1.0682,
"step": 14500
},
{
"epoch": 4.57,
"eval_accuracy": 0.7785186060053325,
"eval_loss": 1.0032246112823486,
"eval_runtime": 4180.7422,
"eval_samples_per_second": 24.682,
"eval_steps_per_second": 3.085,
"step": 14711
},
{
"epoch": 4.66,
"learning_rate": 5.9351276742581096e-05,
"loss": 1.0524,
"step": 15000
},
{
"epoch": 4.67,
"eval_accuracy": 0.7806327231861181,
"eval_loss": 0.9907068014144897,
"eval_runtime": 4180.8503,
"eval_samples_per_second": 24.681,
"eval_steps_per_second": 3.085,
"step": 15024
},
{
"epoch": 4.76,
"eval_accuracy": 0.7826726152932884,
"eval_loss": 0.9796966910362244,
"eval_runtime": 4176.9966,
"eval_samples_per_second": 24.704,
"eval_steps_per_second": 3.088,
"step": 15337
},
{
"epoch": 4.81,
"learning_rate": 5.762594893029676e-05,
"loss": 1.0338,
"step": 15500
},
{
"epoch": 4.86,
"eval_accuracy": 0.7838565333062765,
"eval_loss": 0.9712271690368652,
"eval_runtime": 4181.1547,
"eval_samples_per_second": 24.679,
"eval_steps_per_second": 3.085,
"step": 15650
},
{
"epoch": 4.96,
"eval_accuracy": 0.7854880736072989,
"eval_loss": 0.9626355767250061,
"eval_runtime": 4176.3487,
"eval_samples_per_second": 24.707,
"eval_steps_per_second": 3.089,
"step": 15963
},
{
"epoch": 4.97,
"learning_rate": 5.590062111801242e-05,
"loss": 1.0188,
"step": 16000
},
{
"epoch": 5.05,
"eval_accuracy": 0.7867994298371893,
"eval_loss": 0.9558107256889343,
"eval_runtime": 4179.0241,
"eval_samples_per_second": 24.692,
"eval_steps_per_second": 3.087,
"step": 16276
},
{
"epoch": 5.12,
"learning_rate": 5.417529330572809e-05,
"loss": 1.003,
"step": 16500
},
{
"epoch": 5.15,
"eval_accuracy": 0.7884956846420655,
"eval_loss": 0.9470182657241821,
"eval_runtime": 4176.2829,
"eval_samples_per_second": 24.708,
"eval_steps_per_second": 3.089,
"step": 16589
},
{
"epoch": 5.25,
"eval_accuracy": 0.789320227965163,
"eval_loss": 0.9419927597045898,
"eval_runtime": 4176.4362,
"eval_samples_per_second": 24.707,
"eval_steps_per_second": 3.089,
"step": 16902
},
{
"epoch": 5.28,
"learning_rate": 5.244996549344375e-05,
"loss": 0.989,
"step": 17000
},
{
"epoch": 5.35,
"eval_accuracy": 0.7911368944368907,
"eval_loss": 0.9319880604743958,
"eval_runtime": 4173.211,
"eval_samples_per_second": 24.726,
"eval_steps_per_second": 3.091,
"step": 17215
},
{
"epoch": 5.43,
"learning_rate": 5.072463768115943e-05,
"loss": 0.9786,
"step": 17500
},
{
"epoch": 5.44,
"eval_accuracy": 0.7913904624057875,
"eval_loss": 0.9292972683906555,
"eval_runtime": 4172.1427,
"eval_samples_per_second": 24.732,
"eval_steps_per_second": 3.092,
"step": 17528
},
{
"epoch": 5.54,
"eval_accuracy": 0.7932201930680667,
"eval_loss": 0.9203895330429077,
"eval_runtime": 4172.1933,
"eval_samples_per_second": 24.732,
"eval_steps_per_second": 3.092,
"step": 17841
},
{
"epoch": 5.59,
"learning_rate": 4.899930986887509e-05,
"loss": 0.9688,
"step": 18000
},
{
"epoch": 5.64,
"eval_accuracy": 0.7938784128863696,
"eval_loss": 0.9155610799789429,
"eval_runtime": 4171.4989,
"eval_samples_per_second": 24.736,
"eval_steps_per_second": 3.092,
"step": 18154
},
{
"epoch": 5.73,
"eval_accuracy": 0.7952068414816341,
"eval_loss": 0.9091127514839172,
"eval_runtime": 4177.1927,
"eval_samples_per_second": 24.702,
"eval_steps_per_second": 3.088,
"step": 18467
},
{
"epoch": 5.74,
"learning_rate": 4.727398205659075e-05,
"loss": 0.9576,
"step": 18500
},
{
"epoch": 5.83,
"eval_accuracy": 0.7959312809475213,
"eval_loss": 0.9035018682479858,
"eval_runtime": 4178.199,
"eval_samples_per_second": 24.697,
"eval_steps_per_second": 3.087,
"step": 18780
},
{
"epoch": 5.9,
"learning_rate": 4.554865424430642e-05,
"loss": 0.9489,
"step": 19000
},
{
"epoch": 5.93,
"eval_accuracy": 0.7968186229313537,
"eval_loss": 0.8974489569664001,
"eval_runtime": 4175.3753,
"eval_samples_per_second": 24.713,
"eval_steps_per_second": 3.089,
"step": 19093
},
{
"epoch": 6.03,
"eval_accuracy": 0.7980068894657456,
"eval_loss": 0.8927856087684631,
"eval_runtime": 4174.5551,
"eval_samples_per_second": 24.718,
"eval_steps_per_second": 3.09,
"step": 19406
},
{
"epoch": 6.06,
"learning_rate": 4.382332643202209e-05,
"loss": 0.9384,
"step": 19500
},
{
"epoch": 6.12,
"eval_accuracy": 0.7989207834455775,
"eval_loss": 0.8878790140151978,
"eval_runtime": 4176.0286,
"eval_samples_per_second": 24.709,
"eval_steps_per_second": 3.089,
"step": 19719
},
{
"epoch": 6.21,
"learning_rate": 4.209799861973775e-05,
"loss": 0.9292,
"step": 20000
},
{
"epoch": 6.22,
"eval_accuracy": 0.7995861067424526,
"eval_loss": 0.8841228485107422,
"eval_runtime": 4174.1874,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 3.09,
"step": 20032
},
{
"epoch": 6.32,
"eval_accuracy": 0.8005012964274263,
"eval_loss": 0.8798208236694336,
"eval_runtime": 4177.291,
"eval_samples_per_second": 24.702,
"eval_steps_per_second": 3.088,
"step": 20345
},
{
"epoch": 6.37,
"learning_rate": 4.0372670807453414e-05,
"loss": 0.921,
"step": 20500
},
{
"epoch": 6.41,
"eval_accuracy": 0.8013221542890863,
"eval_loss": 0.8738238215446472,
"eval_runtime": 4179.3089,
"eval_samples_per_second": 24.69,
"eval_steps_per_second": 3.086,
"step": 20658
},
{
"epoch": 6.51,
"eval_accuracy": 0.8020292962858104,
"eval_loss": 0.8702828288078308,
"eval_runtime": 4183.644,
"eval_samples_per_second": 24.664,
"eval_steps_per_second": 3.083,
"step": 20971
},
{
"epoch": 6.52,
"learning_rate": 3.864734299516908e-05,
"loss": 0.9142,
"step": 21000
},
{
"epoch": 6.61,
"eval_accuracy": 0.8031108659339165,
"eval_loss": 0.8653974533081055,
"eval_runtime": 4177.1307,
"eval_samples_per_second": 24.703,
"eval_steps_per_second": 3.088,
"step": 21284
},
{
"epoch": 6.68,
"learning_rate": 3.692201518288475e-05,
"loss": 0.9059,
"step": 21500
},
{
"epoch": 6.71,
"eval_accuracy": 0.803630800811672,
"eval_loss": 0.8622693419456482,
"eval_runtime": 4177.9531,
"eval_samples_per_second": 24.698,
"eval_steps_per_second": 3.087,
"step": 21597
},
{
"epoch": 6.8,
"eval_accuracy": 0.8044488106784644,
"eval_loss": 0.8577073812484741,
"eval_runtime": 4179.042,
"eval_samples_per_second": 24.692,
"eval_steps_per_second": 3.087,
"step": 21910
},
{
"epoch": 6.83,
"learning_rate": 3.519668737060042e-05,
"loss": 0.9,
"step": 22000
},
{
"epoch": 6.9,
"eval_accuracy": 0.8051737023740322,
"eval_loss": 0.8543989062309265,
"eval_runtime": 4184.397,
"eval_samples_per_second": 24.66,
"eval_steps_per_second": 3.083,
"step": 22223
},
{
"epoch": 6.99,
"learning_rate": 3.347135955831608e-05,
"loss": 0.8938,
"step": 22500
},
{
"epoch": 7.0,
"eval_accuracy": 0.8057043385627138,
"eval_loss": 0.8503552079200745,
"eval_runtime": 4183.5003,
"eval_samples_per_second": 24.665,
"eval_steps_per_second": 3.083,
"step": 22536
},
{
"epoch": 7.1,
"eval_accuracy": 0.8066939882795919,
"eval_loss": 0.8455684781074524,
"eval_runtime": 4184.5066,
"eval_samples_per_second": 24.659,
"eval_steps_per_second": 3.083,
"step": 22849
},
{
"epoch": 7.14,
"learning_rate": 3.1746031746031745e-05,
"loss": 0.8863,
"step": 23000
},
{
"epoch": 7.19,
"eval_accuracy": 0.8068218962399752,
"eval_loss": 0.8439931273460388,
"eval_runtime": 4178.7482,
"eval_samples_per_second": 24.693,
"eval_steps_per_second": 3.087,
"step": 23162
},
{
"epoch": 7.29,
"eval_accuracy": 0.8075830543867027,
"eval_loss": 0.8405274152755737,
"eval_runtime": 4175.082,
"eval_samples_per_second": 24.715,
"eval_steps_per_second": 3.09,
"step": 23475
},
{
"epoch": 7.3,
"learning_rate": 3.0020703933747414e-05,
"loss": 0.8804,
"step": 23500
},
{
"epoch": 7.39,
"eval_accuracy": 0.8083236702044754,
"eval_loss": 0.8358407616615295,
"eval_runtime": 4173.8003,
"eval_samples_per_second": 24.723,
"eval_steps_per_second": 3.09,
"step": 23788
},
{
"epoch": 7.45,
"learning_rate": 2.829537612146308e-05,
"loss": 0.8743,
"step": 24000
},
{
"epoch": 7.48,
"eval_accuracy": 0.8087474534271941,
"eval_loss": 0.8344011306762695,
"eval_runtime": 4174.341,
"eval_samples_per_second": 24.719,
"eval_steps_per_second": 3.09,
"step": 24101
},
{
"epoch": 7.58,
"eval_accuracy": 0.8092334825727566,
"eval_loss": 0.8311472535133362,
"eval_runtime": 4174.6403,
"eval_samples_per_second": 24.718,
"eval_steps_per_second": 3.09,
"step": 24414
},
{
"epoch": 7.61,
"learning_rate": 2.6570048309178748e-05,
"loss": 0.8706,
"step": 24500
},
{
"epoch": 7.68,
"eval_accuracy": 0.8097197803323306,
"eval_loss": 0.8285520672798157,
"eval_runtime": 4174.1958,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 3.09,
"step": 24727
},
{
"epoch": 7.76,
"learning_rate": 2.484472049689441e-05,
"loss": 0.8645,
"step": 25000
},
{
"epoch": 7.78,
"eval_accuracy": 0.810216617587347,
"eval_loss": 0.8253086805343628,
"eval_runtime": 4174.8315,
"eval_samples_per_second": 24.716,
"eval_steps_per_second": 3.09,
"step": 25040
},
{
"epoch": 7.87,
"eval_accuracy": 0.8111007973238641,
"eval_loss": 0.8211511373519897,
"eval_runtime": 4173.759,
"eval_samples_per_second": 24.723,
"eval_steps_per_second": 3.09,
"step": 25353
},
{
"epoch": 7.92,
"learning_rate": 2.311939268461008e-05,
"loss": 0.8602,
"step": 25500
},
{
"epoch": 7.97,
"eval_accuracy": 0.8111680475751303,
"eval_loss": 0.8205570578575134,
"eval_runtime": 4174.1596,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 3.09,
"step": 25666
},
{
"epoch": 8.07,
"eval_accuracy": 0.8120529685373788,
"eval_loss": 0.8158560991287231,
"eval_runtime": 4175.0585,
"eval_samples_per_second": 24.715,
"eval_steps_per_second": 3.09,
"step": 25979
},
{
"epoch": 8.07,
"learning_rate": 2.139406487232574e-05,
"loss": 0.8538,
"step": 26000
},
{
"epoch": 8.16,
"eval_accuracy": 0.8122099938117417,
"eval_loss": 0.814548134803772,
"eval_runtime": 4175.6451,
"eval_samples_per_second": 24.712,
"eval_steps_per_second": 3.089,
"step": 26292
},
{
"epoch": 8.23,
"learning_rate": 1.966873706004141e-05,
"loss": 0.8482,
"step": 26500
},
{
"epoch": 8.26,
"eval_accuracy": 0.8130589470835723,
"eval_loss": 0.8115074634552002,
"eval_runtime": 4173.7083,
"eval_samples_per_second": 24.723,
"eval_steps_per_second": 3.091,
"step": 26605
},
{
"epoch": 8.36,
"eval_accuracy": 0.8134356257072347,
"eval_loss": 0.8089998364448547,
"eval_runtime": 4174.6745,
"eval_samples_per_second": 24.717,
"eval_steps_per_second": 3.09,
"step": 26918
},
{
"epoch": 8.38,
"learning_rate": 1.7943409247757076e-05,
"loss": 0.8488,
"step": 27000
},
{
"epoch": 8.46,
"eval_accuracy": 0.8134433401076823,
"eval_loss": 0.8088431358337402,
"eval_runtime": 4174.8389,
"eval_samples_per_second": 24.716,
"eval_steps_per_second": 3.09,
"step": 27231
},
{
"epoch": 8.54,
"learning_rate": 1.621808143547274e-05,
"loss": 0.8423,
"step": 27500
},
{
"epoch": 8.55,
"eval_accuracy": 0.8137619275792424,
"eval_loss": 0.8057170510292053,
"eval_runtime": 4176.6965,
"eval_samples_per_second": 24.705,
"eval_steps_per_second": 3.088,
"step": 27544
},
{
"epoch": 8.65,
"eval_accuracy": 0.8144356879652452,
"eval_loss": 0.8039630651473999,
"eval_runtime": 4175.7093,
"eval_samples_per_second": 24.711,
"eval_steps_per_second": 3.089,
"step": 27857
},
{
"epoch": 8.69,
"learning_rate": 1.4492753623188407e-05,
"loss": 0.8396,
"step": 28000
},
{
"epoch": 8.75,
"eval_accuracy": 0.8145382455874637,
"eval_loss": 0.8026472926139832,
"eval_runtime": 4180.1354,
"eval_samples_per_second": 24.685,
"eval_steps_per_second": 3.086,
"step": 28170
},
{
"epoch": 8.84,
"eval_accuracy": 0.8152832629079341,
"eval_loss": 0.7988529205322266,
"eval_runtime": 4179.9334,
"eval_samples_per_second": 24.686,
"eval_steps_per_second": 3.086,
"step": 28483
},
{
"epoch": 8.85,
"learning_rate": 1.276742581090407e-05,
"loss": 0.8377,
"step": 28500
},
{
"epoch": 8.94,
"eval_accuracy": 0.8155252212471398,
"eval_loss": 0.7979427576065063,
"eval_runtime": 4178.4294,
"eval_samples_per_second": 24.695,
"eval_steps_per_second": 3.087,
"step": 28796
},
{
"epoch": 9.01,
"learning_rate": 1.1042097998619738e-05,
"loss": 0.8319,
"step": 29000
},
{
"epoch": 9.04,
"eval_accuracy": 0.8159056442908116,
"eval_loss": 0.7969533801078796,
"eval_runtime": 4174.229,
"eval_samples_per_second": 24.72,
"eval_steps_per_second": 3.09,
"step": 29109
},
{
"epoch": 9.14,
"eval_accuracy": 0.816157387607754,
"eval_loss": 0.7945725917816162,
"eval_runtime": 4172.4325,
"eval_samples_per_second": 24.731,
"eval_steps_per_second": 3.091,
"step": 29422
},
{
"epoch": 9.16,
"learning_rate": 9.316770186335403e-06,
"loss": 0.8262,
"step": 29500
},
{
"epoch": 9.23,
"eval_accuracy": 0.8161445368180659,
"eval_loss": 0.7940195202827454,
"eval_runtime": 4171.791,
"eval_samples_per_second": 24.734,
"eval_steps_per_second": 3.092,
"step": 29735
},
{
"epoch": 9.32,
"learning_rate": 7.591442374051071e-06,
"loss": 0.8255,
"step": 30000
},
{
"epoch": 9.33,
"eval_accuracy": 0.8167732822776922,
"eval_loss": 0.7918646931648254,
"eval_runtime": 4178.9618,
"eval_samples_per_second": 24.692,
"eval_steps_per_second": 3.087,
"step": 30048
},
{
"epoch": 9.43,
"eval_accuracy": 0.8168878384273093,
"eval_loss": 0.7914555072784424,
"eval_runtime": 4176.3937,
"eval_samples_per_second": 24.707,
"eval_steps_per_second": 3.089,
"step": 30361
},
{
"epoch": 9.47,
"learning_rate": 5.866114561766736e-06,
"loss": 0.824,
"step": 30500
},
{
"epoch": 9.53,
"eval_accuracy": 0.817306118390898,
"eval_loss": 0.7896197438240051,
"eval_runtime": 4176.9927,
"eval_samples_per_second": 24.704,
"eval_steps_per_second": 3.088,
"step": 30674
},
{
"epoch": 9.62,
"eval_accuracy": 0.8175610442353789,
"eval_loss": 0.7872260212898254,
"eval_runtime": 4173.6592,
"eval_samples_per_second": 24.723,
"eval_steps_per_second": 3.091,
"step": 30987
},
{
"epoch": 9.63,
"learning_rate": 4.140786749482402e-06,
"loss": 0.8218,
"step": 31000
},
{
"epoch": 9.72,
"eval_accuracy": 0.8176242146063594,
"eval_loss": 0.7876725792884827,
"eval_runtime": 4174.4186,
"eval_samples_per_second": 24.719,
"eval_steps_per_second": 3.09,
"step": 31300
},
{
"epoch": 9.78,
"learning_rate": 2.4154589371980677e-06,
"loss": 0.8204,
"step": 31500
},
{
"epoch": 9.82,
"eval_accuracy": 0.8176433057924909,
"eval_loss": 0.7873775959014893,
"eval_runtime": 4173.4522,
"eval_samples_per_second": 24.725,
"eval_steps_per_second": 3.091,
"step": 31613
},
{
"epoch": 9.91,
"eval_accuracy": 0.8180999846708549,
"eval_loss": 0.7846992611885071,
"eval_runtime": 4174.7361,
"eval_samples_per_second": 24.717,
"eval_steps_per_second": 3.09,
"step": 31926
},
{
"epoch": 9.94,
"learning_rate": 6.901311249137336e-07,
"loss": 0.8177,
"step": 32000
},
{
"epoch": 10.0,
"step": 32200,
"total_flos": 7.683327485823698e+18,
"train_loss": 1.915723304985473,
"train_runtime": 1372706.2896,
"train_samples_per_second": 6.006,
"train_steps_per_second": 0.023
}
],
"max_steps": 32200,
"num_train_epochs": 10,
"total_flos": 7.683327485823698e+18,
"trial_name": null,
"trial_params": null
}