mistral-zoil-chat / trainer_state.json
jazza234234's picture
Upload 3 files
494b8c7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.5978367748279254,
"eval_steps": 35,
"global_step": 3250,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 2.4860678577282414e-05,
"loss": 2.6111,
"step": 35
},
{
"epoch": 0.02,
"eval_loss": 2.0262463092803955,
"eval_runtime": 11.4452,
"eval_samples_per_second": 1.835,
"eval_steps_per_second": 0.262,
"step": 35
},
{
"epoch": 0.03,
"learning_rate": 2.471725946566137e-05,
"loss": 2.2099,
"step": 70
},
{
"epoch": 0.03,
"eval_loss": 1.8728946447372437,
"eval_runtime": 11.4415,
"eval_samples_per_second": 1.835,
"eval_steps_per_second": 0.262,
"step": 70
},
{
"epoch": 0.05,
"learning_rate": 2.4573840354040322e-05,
"loss": 2.1297,
"step": 105
},
{
"epoch": 0.05,
"eval_loss": 1.8009727001190186,
"eval_runtime": 11.4312,
"eval_samples_per_second": 1.837,
"eval_steps_per_second": 0.262,
"step": 105
},
{
"epoch": 0.07,
"learning_rate": 2.4430421242419278e-05,
"loss": 2.0376,
"step": 140
},
{
"epoch": 0.07,
"eval_loss": 1.7509633302688599,
"eval_runtime": 11.4441,
"eval_samples_per_second": 1.835,
"eval_steps_per_second": 0.262,
"step": 140
},
{
"epoch": 0.09,
"learning_rate": 2.428700213079823e-05,
"loss": 2.0077,
"step": 175
},
{
"epoch": 0.09,
"eval_loss": 1.7361119985580444,
"eval_runtime": 11.4061,
"eval_samples_per_second": 1.841,
"eval_steps_per_second": 0.263,
"step": 175
},
{
"epoch": 0.1,
"learning_rate": 2.4143583019177186e-05,
"loss": 1.9427,
"step": 210
},
{
"epoch": 0.1,
"eval_loss": 1.7030919790267944,
"eval_runtime": 11.3698,
"eval_samples_per_second": 1.847,
"eval_steps_per_second": 0.264,
"step": 210
},
{
"epoch": 0.12,
"learning_rate": 2.4000163907556138e-05,
"loss": 1.9224,
"step": 245
},
{
"epoch": 0.12,
"eval_loss": 1.6869089603424072,
"eval_runtime": 11.4842,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 245
},
{
"epoch": 0.14,
"learning_rate": 2.3856744795935094e-05,
"loss": 1.9088,
"step": 280
},
{
"epoch": 0.14,
"eval_loss": 1.6638721227645874,
"eval_runtime": 11.4694,
"eval_samples_per_second": 1.831,
"eval_steps_per_second": 0.262,
"step": 280
},
{
"epoch": 0.15,
"learning_rate": 2.3713325684314046e-05,
"loss": 1.9012,
"step": 315
},
{
"epoch": 0.15,
"eval_loss": 1.6475718021392822,
"eval_runtime": 11.4639,
"eval_samples_per_second": 1.832,
"eval_steps_per_second": 0.262,
"step": 315
},
{
"epoch": 0.17,
"learning_rate": 2.3569906572693e-05,
"loss": 1.8873,
"step": 350
},
{
"epoch": 0.17,
"eval_loss": 1.6309669017791748,
"eval_runtime": 11.4991,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 350
},
{
"epoch": 0.19,
"learning_rate": 2.3426487461071954e-05,
"loss": 1.881,
"step": 385
},
{
"epoch": 0.19,
"eval_loss": 1.6221837997436523,
"eval_runtime": 11.509,
"eval_samples_per_second": 1.825,
"eval_steps_per_second": 0.261,
"step": 385
},
{
"epoch": 0.21,
"learning_rate": 2.328306834945091e-05,
"loss": 1.8931,
"step": 420
},
{
"epoch": 0.21,
"eval_loss": 1.6102544069290161,
"eval_runtime": 11.52,
"eval_samples_per_second": 1.823,
"eval_steps_per_second": 0.26,
"step": 420
},
{
"epoch": 0.22,
"learning_rate": 2.3139649237829865e-05,
"loss": 1.8524,
"step": 455
},
{
"epoch": 0.22,
"eval_loss": 1.607195496559143,
"eval_runtime": 11.4864,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 455
},
{
"epoch": 0.24,
"learning_rate": 2.299623012620882e-05,
"loss": 1.8498,
"step": 490
},
{
"epoch": 0.24,
"eval_loss": 1.5910367965698242,
"eval_runtime": 11.4927,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.261,
"step": 490
},
{
"epoch": 0.26,
"learning_rate": 2.2852811014587773e-05,
"loss": 1.8194,
"step": 525
},
{
"epoch": 0.26,
"eval_loss": 1.5871880054473877,
"eval_runtime": 11.4668,
"eval_samples_per_second": 1.831,
"eval_steps_per_second": 0.262,
"step": 525
},
{
"epoch": 0.28,
"learning_rate": 2.270939190296673e-05,
"loss": 1.8619,
"step": 560
},
{
"epoch": 0.28,
"eval_loss": 1.577917218208313,
"eval_runtime": 11.4911,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 560
},
{
"epoch": 0.29,
"learning_rate": 2.256597279134568e-05,
"loss": 1.8458,
"step": 595
},
{
"epoch": 0.29,
"eval_loss": 1.569788932800293,
"eval_runtime": 11.4523,
"eval_samples_per_second": 1.834,
"eval_steps_per_second": 0.262,
"step": 595
},
{
"epoch": 0.31,
"learning_rate": 2.2422553679724637e-05,
"loss": 1.7886,
"step": 630
},
{
"epoch": 0.31,
"eval_loss": 1.5579793453216553,
"eval_runtime": 11.4733,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 630
},
{
"epoch": 0.33,
"learning_rate": 2.227913456810359e-05,
"loss": 1.8302,
"step": 665
},
{
"epoch": 0.33,
"eval_loss": 1.5591094493865967,
"eval_runtime": 11.5195,
"eval_samples_per_second": 1.823,
"eval_steps_per_second": 0.26,
"step": 665
},
{
"epoch": 0.34,
"learning_rate": 2.2135715456482545e-05,
"loss": 1.8173,
"step": 700
},
{
"epoch": 0.34,
"eval_loss": 1.5488135814666748,
"eval_runtime": 11.5232,
"eval_samples_per_second": 1.822,
"eval_steps_per_second": 0.26,
"step": 700
},
{
"epoch": 0.36,
"learning_rate": 2.1992296344861498e-05,
"loss": 1.822,
"step": 735
},
{
"epoch": 0.36,
"eval_loss": 1.5554958581924438,
"eval_runtime": 11.4539,
"eval_samples_per_second": 1.833,
"eval_steps_per_second": 0.262,
"step": 735
},
{
"epoch": 0.38,
"learning_rate": 2.1848877233240453e-05,
"loss": 1.7775,
"step": 770
},
{
"epoch": 0.38,
"eval_loss": 1.5420997142791748,
"eval_runtime": 11.4375,
"eval_samples_per_second": 1.836,
"eval_steps_per_second": 0.262,
"step": 770
},
{
"epoch": 0.4,
"learning_rate": 2.1705458121619406e-05,
"loss": 1.8007,
"step": 805
},
{
"epoch": 0.4,
"eval_loss": 1.530941367149353,
"eval_runtime": 11.5016,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 805
},
{
"epoch": 0.41,
"learning_rate": 2.156203900999836e-05,
"loss": 1.802,
"step": 840
},
{
"epoch": 0.41,
"eval_loss": 1.5383422374725342,
"eval_runtime": 11.502,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 840
},
{
"epoch": 0.43,
"learning_rate": 2.1418619898377314e-05,
"loss": 1.7921,
"step": 875
},
{
"epoch": 0.43,
"eval_loss": 1.5289701223373413,
"eval_runtime": 11.4801,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 875
},
{
"epoch": 0.45,
"learning_rate": 2.127520078675627e-05,
"loss": 1.7741,
"step": 910
},
{
"epoch": 0.45,
"eval_loss": 1.5224323272705078,
"eval_runtime": 11.4514,
"eval_samples_per_second": 1.834,
"eval_steps_per_second": 0.262,
"step": 910
},
{
"epoch": 0.46,
"learning_rate": 2.1131781675135225e-05,
"loss": 1.7642,
"step": 945
},
{
"epoch": 0.46,
"eval_loss": 1.5138221979141235,
"eval_runtime": 11.4557,
"eval_samples_per_second": 1.833,
"eval_steps_per_second": 0.262,
"step": 945
},
{
"epoch": 0.48,
"learning_rate": 2.098836256351418e-05,
"loss": 1.7763,
"step": 980
},
{
"epoch": 0.48,
"eval_loss": 1.5038686990737915,
"eval_runtime": 11.491,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 980
},
{
"epoch": 0.5,
"learning_rate": 2.0844943451893133e-05,
"loss": 1.744,
"step": 1015
},
{
"epoch": 0.5,
"eval_loss": 1.5031676292419434,
"eval_runtime": 11.541,
"eval_samples_per_second": 1.82,
"eval_steps_per_second": 0.26,
"step": 1015
},
{
"epoch": 0.52,
"learning_rate": 2.070152434027209e-05,
"loss": 1.7968,
"step": 1050
},
{
"epoch": 0.52,
"eval_loss": 1.507965326309204,
"eval_runtime": 11.5281,
"eval_samples_per_second": 1.822,
"eval_steps_per_second": 0.26,
"step": 1050
},
{
"epoch": 0.53,
"learning_rate": 2.055810522865104e-05,
"loss": 1.7192,
"step": 1085
},
{
"epoch": 0.53,
"eval_loss": 1.4997100830078125,
"eval_runtime": 11.4603,
"eval_samples_per_second": 1.832,
"eval_steps_per_second": 0.262,
"step": 1085
},
{
"epoch": 0.55,
"learning_rate": 2.0414686117029997e-05,
"loss": 1.7469,
"step": 1120
},
{
"epoch": 0.55,
"eval_loss": 1.4938936233520508,
"eval_runtime": 11.5073,
"eval_samples_per_second": 1.825,
"eval_steps_per_second": 0.261,
"step": 1120
},
{
"epoch": 0.57,
"learning_rate": 2.027126700540895e-05,
"loss": 1.7566,
"step": 1155
},
{
"epoch": 0.57,
"eval_loss": 1.4912477731704712,
"eval_runtime": 11.5141,
"eval_samples_per_second": 1.824,
"eval_steps_per_second": 0.261,
"step": 1155
},
{
"epoch": 0.59,
"learning_rate": 2.0127847893787905e-05,
"loss": 1.7395,
"step": 1190
},
{
"epoch": 0.59,
"eval_loss": 1.4848188161849976,
"eval_runtime": 11.5052,
"eval_samples_per_second": 1.825,
"eval_steps_per_second": 0.261,
"step": 1190
},
{
"epoch": 0.6,
"learning_rate": 1.9984428782166857e-05,
"loss": 1.7555,
"step": 1225
},
{
"epoch": 0.6,
"eval_loss": 1.4838807582855225,
"eval_runtime": 11.4789,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 1225
},
{
"epoch": 0.62,
"learning_rate": 1.9841009670545813e-05,
"loss": 1.7447,
"step": 1260
},
{
"epoch": 0.62,
"eval_loss": 1.4835803508758545,
"eval_runtime": 11.5109,
"eval_samples_per_second": 1.824,
"eval_steps_per_second": 0.261,
"step": 1260
},
{
"epoch": 0.64,
"learning_rate": 1.9697590558924765e-05,
"loss": 1.7375,
"step": 1295
},
{
"epoch": 0.64,
"eval_loss": 1.483397126197815,
"eval_runtime": 11.4887,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 1295
},
{
"epoch": 0.65,
"learning_rate": 1.955417144730372e-05,
"loss": 1.7565,
"step": 1330
},
{
"epoch": 0.65,
"eval_loss": 1.480878233909607,
"eval_runtime": 11.4732,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 1330
},
{
"epoch": 0.67,
"learning_rate": 1.9410752335682677e-05,
"loss": 1.6992,
"step": 1365
},
{
"epoch": 0.67,
"eval_loss": 1.471764087677002,
"eval_runtime": 11.4976,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 1365
},
{
"epoch": 0.69,
"learning_rate": 1.926733322406163e-05,
"loss": 1.7588,
"step": 1400
},
{
"epoch": 0.69,
"eval_loss": 1.4718772172927856,
"eval_runtime": 11.4494,
"eval_samples_per_second": 1.834,
"eval_steps_per_second": 0.262,
"step": 1400
},
{
"epoch": 0.71,
"learning_rate": 1.9123914112440585e-05,
"loss": 1.7459,
"step": 1435
},
{
"epoch": 0.71,
"eval_loss": 1.4739229679107666,
"eval_runtime": 11.4772,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 1435
},
{
"epoch": 0.72,
"learning_rate": 1.898049500081954e-05,
"loss": 1.7496,
"step": 1470
},
{
"epoch": 0.72,
"eval_loss": 1.4594241380691528,
"eval_runtime": 11.4774,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 1470
},
{
"epoch": 0.74,
"learning_rate": 1.8837075889198493e-05,
"loss": 1.7217,
"step": 1505
},
{
"epoch": 0.74,
"eval_loss": 1.4519307613372803,
"eval_runtime": 11.4991,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 1505
},
{
"epoch": 0.76,
"learning_rate": 1.8693656777577448e-05,
"loss": 1.7379,
"step": 1540
},
{
"epoch": 0.76,
"eval_loss": 1.450444221496582,
"eval_runtime": 11.4875,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 1540
},
{
"epoch": 0.77,
"learning_rate": 1.85502376659564e-05,
"loss": 1.7178,
"step": 1575
},
{
"epoch": 0.77,
"eval_loss": 1.4498975276947021,
"eval_runtime": 11.4834,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 1575
},
{
"epoch": 0.79,
"learning_rate": 1.8406818554335356e-05,
"loss": 1.726,
"step": 1610
},
{
"epoch": 0.79,
"eval_loss": 1.4456058740615845,
"eval_runtime": 11.4896,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 1610
},
{
"epoch": 0.81,
"learning_rate": 1.826339944271431e-05,
"loss": 1.6916,
"step": 1645
},
{
"epoch": 0.81,
"eval_loss": 1.4492512941360474,
"eval_runtime": 11.4853,
"eval_samples_per_second": 1.828,
"eval_steps_per_second": 0.261,
"step": 1645
},
{
"epoch": 0.83,
"learning_rate": 1.8119980331093264e-05,
"loss": 1.7388,
"step": 1680
},
{
"epoch": 0.83,
"eval_loss": 1.4481810331344604,
"eval_runtime": 11.5015,
"eval_samples_per_second": 1.826,
"eval_steps_per_second": 0.261,
"step": 1680
},
{
"epoch": 0.84,
"learning_rate": 1.7976561219472217e-05,
"loss": 1.7026,
"step": 1715
},
{
"epoch": 0.84,
"eval_loss": 1.4463753700256348,
"eval_runtime": 11.4964,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.261,
"step": 1715
},
{
"epoch": 0.86,
"learning_rate": 1.7833142107851172e-05,
"loss": 1.7025,
"step": 1750
},
{
"epoch": 0.86,
"eval_loss": 1.4447450637817383,
"eval_runtime": 11.5592,
"eval_samples_per_second": 1.817,
"eval_steps_per_second": 0.26,
"step": 1750
},
{
"epoch": 0.88,
"learning_rate": 1.7689722996230128e-05,
"loss": 1.7497,
"step": 1785
},
{
"epoch": 0.88,
"eval_loss": 1.442565679550171,
"eval_runtime": 11.4749,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 1785
},
{
"epoch": 0.89,
"learning_rate": 1.754630388460908e-05,
"loss": 1.7545,
"step": 1820
},
{
"epoch": 0.89,
"eval_loss": 1.4413307905197144,
"eval_runtime": 11.4848,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 1820
},
{
"epoch": 0.91,
"learning_rate": 1.7402884772988036e-05,
"loss": 1.6951,
"step": 1855
},
{
"epoch": 0.91,
"eval_loss": 1.440474033355713,
"eval_runtime": 11.5878,
"eval_samples_per_second": 1.812,
"eval_steps_per_second": 0.259,
"step": 1855
},
{
"epoch": 0.93,
"learning_rate": 1.725946566136699e-05,
"loss": 1.7432,
"step": 1890
},
{
"epoch": 0.93,
"eval_loss": 1.4366823434829712,
"eval_runtime": 11.5531,
"eval_samples_per_second": 1.818,
"eval_steps_per_second": 0.26,
"step": 1890
},
{
"epoch": 0.95,
"learning_rate": 1.7116046549745944e-05,
"loss": 1.7153,
"step": 1925
},
{
"epoch": 0.95,
"eval_loss": 1.4368674755096436,
"eval_runtime": 11.4927,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.261,
"step": 1925
},
{
"epoch": 0.96,
"learning_rate": 1.69726274381249e-05,
"loss": 1.732,
"step": 1960
},
{
"epoch": 0.96,
"eval_loss": 1.4359997510910034,
"eval_runtime": 11.4761,
"eval_samples_per_second": 1.83,
"eval_steps_per_second": 0.261,
"step": 1960
},
{
"epoch": 0.98,
"learning_rate": 1.6829208326503852e-05,
"loss": 1.7143,
"step": 1995
},
{
"epoch": 0.98,
"eval_loss": 1.437907338142395,
"eval_runtime": 11.5143,
"eval_samples_per_second": 1.824,
"eval_steps_per_second": 0.261,
"step": 1995
},
{
"epoch": 1.0,
"learning_rate": 1.6685789214882808e-05,
"loss": 1.7072,
"step": 2030
},
{
"epoch": 1.0,
"eval_loss": 1.4374998807907104,
"eval_runtime": 11.4633,
"eval_samples_per_second": 1.832,
"eval_steps_per_second": 0.262,
"step": 2030
},
{
"epoch": 1.02,
"learning_rate": 1.654237010326176e-05,
"loss": 1.591,
"step": 2065
},
{
"epoch": 1.02,
"eval_loss": 1.4363301992416382,
"eval_runtime": 11.4952,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.261,
"step": 2065
},
{
"epoch": 1.03,
"learning_rate": 1.6398950991640716e-05,
"loss": 1.549,
"step": 2100
},
{
"epoch": 1.03,
"eval_loss": 1.4447616338729858,
"eval_runtime": 11.4847,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.261,
"step": 2100
},
{
"epoch": 1.05,
"learning_rate": 1.6255531880019668e-05,
"loss": 1.5636,
"step": 2135
},
{
"epoch": 1.05,
"eval_loss": 1.4376814365386963,
"eval_runtime": 11.5477,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 2135
},
{
"epoch": 1.07,
"learning_rate": 1.6112112768398624e-05,
"loss": 1.5629,
"step": 2170
},
{
"epoch": 1.07,
"eval_loss": 1.4429987668991089,
"eval_runtime": 11.5258,
"eval_samples_per_second": 1.822,
"eval_steps_per_second": 0.26,
"step": 2170
},
{
"epoch": 1.08,
"learning_rate": 1.596869365677758e-05,
"loss": 1.5539,
"step": 2205
},
{
"epoch": 1.08,
"eval_loss": 1.4357415437698364,
"eval_runtime": 11.5216,
"eval_samples_per_second": 1.823,
"eval_steps_per_second": 0.26,
"step": 2205
},
{
"epoch": 1.1,
"learning_rate": 1.5825274545156532e-05,
"loss": 1.574,
"step": 2240
},
{
"epoch": 1.1,
"eval_loss": 1.4316208362579346,
"eval_runtime": 11.5068,
"eval_samples_per_second": 1.825,
"eval_steps_per_second": 0.261,
"step": 2240
},
{
"epoch": 1.12,
"learning_rate": 1.5681855433535488e-05,
"loss": 1.5976,
"step": 2275
},
{
"epoch": 1.12,
"eval_loss": 1.4350919723510742,
"eval_runtime": 11.554,
"eval_samples_per_second": 1.818,
"eval_steps_per_second": 0.26,
"step": 2275
},
{
"epoch": 1.14,
"learning_rate": 1.553843632191444e-05,
"loss": 1.6087,
"step": 2310
},
{
"epoch": 1.14,
"eval_loss": 1.4374239444732666,
"eval_runtime": 11.5273,
"eval_samples_per_second": 1.822,
"eval_steps_per_second": 0.26,
"step": 2310
},
{
"epoch": 1.15,
"learning_rate": 1.5395017210293396e-05,
"loss": 1.5684,
"step": 2345
},
{
"epoch": 1.15,
"eval_loss": 1.4325898885726929,
"eval_runtime": 11.558,
"eval_samples_per_second": 1.817,
"eval_steps_per_second": 0.26,
"step": 2345
},
{
"epoch": 1.17,
"learning_rate": 1.5251598098672348e-05,
"loss": 1.5858,
"step": 2380
},
{
"epoch": 1.17,
"eval_loss": 1.437401294708252,
"eval_runtime": 11.542,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 2380
},
{
"epoch": 1.19,
"learning_rate": 1.5108178987051302e-05,
"loss": 1.5768,
"step": 2415
},
{
"epoch": 1.19,
"eval_loss": 1.437371015548706,
"eval_runtime": 11.5127,
"eval_samples_per_second": 1.824,
"eval_steps_per_second": 0.261,
"step": 2415
},
{
"epoch": 1.2,
"learning_rate": 1.496475987543026e-05,
"loss": 1.5719,
"step": 2450
},
{
"epoch": 1.2,
"eval_loss": 1.4342734813690186,
"eval_runtime": 11.5878,
"eval_samples_per_second": 1.812,
"eval_steps_per_second": 0.259,
"step": 2450
},
{
"epoch": 1.22,
"learning_rate": 1.4821340763809213e-05,
"loss": 1.5661,
"step": 2485
},
{
"epoch": 1.22,
"eval_loss": 1.4325451850891113,
"eval_runtime": 11.5679,
"eval_samples_per_second": 1.815,
"eval_steps_per_second": 0.259,
"step": 2485
},
{
"epoch": 1.24,
"learning_rate": 1.4677921652188167e-05,
"loss": 1.571,
"step": 2520
},
{
"epoch": 1.24,
"eval_loss": 1.4307782649993896,
"eval_runtime": 11.5174,
"eval_samples_per_second": 1.823,
"eval_steps_per_second": 0.26,
"step": 2520
},
{
"epoch": 1.26,
"learning_rate": 1.4534502540567121e-05,
"loss": 1.54,
"step": 2555
},
{
"epoch": 1.26,
"eval_loss": 1.4300092458724976,
"eval_runtime": 11.5587,
"eval_samples_per_second": 1.817,
"eval_steps_per_second": 0.26,
"step": 2555
},
{
"epoch": 1.27,
"learning_rate": 1.4391083428946075e-05,
"loss": 1.5275,
"step": 2590
},
{
"epoch": 1.27,
"eval_loss": 1.4285993576049805,
"eval_runtime": 11.5338,
"eval_samples_per_second": 1.821,
"eval_steps_per_second": 0.26,
"step": 2590
},
{
"epoch": 1.29,
"learning_rate": 1.424766431732503e-05,
"loss": 1.5837,
"step": 2625
},
{
"epoch": 1.29,
"eval_loss": 1.4346880912780762,
"eval_runtime": 11.5585,
"eval_samples_per_second": 1.817,
"eval_steps_per_second": 0.26,
"step": 2625
},
{
"epoch": 1.31,
"learning_rate": 1.4104245205703983e-05,
"loss": 1.571,
"step": 2660
},
{
"epoch": 1.31,
"eval_loss": 1.4264836311340332,
"eval_runtime": 11.5547,
"eval_samples_per_second": 1.817,
"eval_steps_per_second": 0.26,
"step": 2660
},
{
"epoch": 1.32,
"learning_rate": 1.3960826094082937e-05,
"loss": 1.5538,
"step": 2695
},
{
"epoch": 1.32,
"eval_loss": 1.424988865852356,
"eval_runtime": 11.5078,
"eval_samples_per_second": 1.825,
"eval_steps_per_second": 0.261,
"step": 2695
},
{
"epoch": 1.34,
"learning_rate": 1.3817406982461891e-05,
"loss": 1.5634,
"step": 2730
},
{
"epoch": 1.34,
"eval_loss": 1.4214783906936646,
"eval_runtime": 11.5523,
"eval_samples_per_second": 1.818,
"eval_steps_per_second": 0.26,
"step": 2730
},
{
"epoch": 1.36,
"learning_rate": 1.3673987870840845e-05,
"loss": 1.5762,
"step": 2765
},
{
"epoch": 1.36,
"eval_loss": 1.417558193206787,
"eval_runtime": 11.5881,
"eval_samples_per_second": 1.812,
"eval_steps_per_second": 0.259,
"step": 2765
},
{
"epoch": 1.38,
"learning_rate": 1.35305687592198e-05,
"loss": 1.5574,
"step": 2800
},
{
"epoch": 1.38,
"eval_loss": 1.4196423292160034,
"eval_runtime": 11.5684,
"eval_samples_per_second": 1.815,
"eval_steps_per_second": 0.259,
"step": 2800
},
{
"epoch": 1.39,
"learning_rate": 1.3387149647598754e-05,
"loss": 1.5783,
"step": 2835
},
{
"epoch": 1.39,
"eval_loss": 1.42311429977417,
"eval_runtime": 11.5665,
"eval_samples_per_second": 1.816,
"eval_steps_per_second": 0.259,
"step": 2835
},
{
"epoch": 1.41,
"learning_rate": 1.3243730535977708e-05,
"loss": 1.576,
"step": 2870
},
{
"epoch": 1.41,
"eval_loss": 1.4219133853912354,
"eval_runtime": 11.5466,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 2870
},
{
"epoch": 1.43,
"learning_rate": 1.3100311424356662e-05,
"loss": 1.6013,
"step": 2905
},
{
"epoch": 1.43,
"eval_loss": 1.4164221286773682,
"eval_runtime": 11.5232,
"eval_samples_per_second": 1.822,
"eval_steps_per_second": 0.26,
"step": 2905
},
{
"epoch": 1.45,
"learning_rate": 1.2956892312735619e-05,
"loss": 1.5824,
"step": 2940
},
{
"epoch": 1.45,
"eval_loss": 1.4148584604263306,
"eval_runtime": 11.494,
"eval_samples_per_second": 1.827,
"eval_steps_per_second": 0.261,
"step": 2940
},
{
"epoch": 1.46,
"learning_rate": 1.2813473201114573e-05,
"loss": 1.5954,
"step": 2975
},
{
"epoch": 1.46,
"eval_loss": 1.4142621755599976,
"eval_runtime": 11.564,
"eval_samples_per_second": 1.816,
"eval_steps_per_second": 0.259,
"step": 2975
},
{
"epoch": 1.48,
"learning_rate": 1.2670054089493527e-05,
"loss": 1.5621,
"step": 3010
},
{
"epoch": 1.48,
"eval_loss": 1.4165393114089966,
"eval_runtime": 11.5455,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 3010
},
{
"epoch": 1.5,
"learning_rate": 1.2526634977872481e-05,
"loss": 1.5877,
"step": 3045
},
{
"epoch": 1.5,
"eval_loss": 1.41389000415802,
"eval_runtime": 11.5632,
"eval_samples_per_second": 1.816,
"eval_steps_per_second": 0.259,
"step": 3045
},
{
"epoch": 1.51,
"learning_rate": 1.2383215866251435e-05,
"loss": 1.5843,
"step": 3080
},
{
"epoch": 1.51,
"eval_loss": 1.409727931022644,
"eval_runtime": 11.5302,
"eval_samples_per_second": 1.821,
"eval_steps_per_second": 0.26,
"step": 3080
},
{
"epoch": 1.53,
"learning_rate": 1.2239796754630389e-05,
"loss": 1.5255,
"step": 3115
},
{
"epoch": 1.53,
"eval_loss": 1.419966697692871,
"eval_runtime": 11.5457,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 3115
},
{
"epoch": 1.55,
"learning_rate": 1.2096377643009343e-05,
"loss": 1.55,
"step": 3150
},
{
"epoch": 1.55,
"eval_loss": 1.4153906106948853,
"eval_runtime": 11.5656,
"eval_samples_per_second": 1.816,
"eval_steps_per_second": 0.259,
"step": 3150
},
{
"epoch": 1.57,
"learning_rate": 1.1952958531388297e-05,
"loss": 1.5519,
"step": 3185
},
{
"epoch": 1.57,
"eval_loss": 1.4161474704742432,
"eval_runtime": 11.5425,
"eval_samples_per_second": 1.819,
"eval_steps_per_second": 0.26,
"step": 3185
},
{
"epoch": 1.58,
"learning_rate": 1.1809539419767251e-05,
"loss": 1.5273,
"step": 3220
},
{
"epoch": 1.58,
"eval_loss": 1.416408658027649,
"eval_runtime": 11.5376,
"eval_samples_per_second": 1.82,
"eval_steps_per_second": 0.26,
"step": 3220
}
],
"logging_steps": 35,
"max_steps": 6102,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 250,
"total_flos": 1.093574423789568e+18,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}