captioner-llava-10w / trainer_state.json
Observer
commit from jcy
24e3f90
raw
history blame
81.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9986976744186047,
"eval_steps": 500,
"global_step": 671,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 9.523809523809525e-07,
"loss": 1.9835,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.904761904761905e-06,
"loss": 1.9424,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 2.8571428571428573e-06,
"loss": 1.9622,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 3.80952380952381e-06,
"loss": 1.8711,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 4.761904761904762e-06,
"loss": 1.856,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 5.7142857142857145e-06,
"loss": 1.7429,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 6.666666666666667e-06,
"loss": 1.7233,
"step": 7
},
{
"epoch": 0.01,
"learning_rate": 7.61904761904762e-06,
"loss": 1.8379,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 8.571428571428571e-06,
"loss": 1.7302,
"step": 9
},
{
"epoch": 0.01,
"learning_rate": 9.523809523809525e-06,
"loss": 1.7233,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 1.0476190476190477e-05,
"loss": 1.7449,
"step": 11
},
{
"epoch": 0.02,
"learning_rate": 1.1428571428571429e-05,
"loss": 1.6604,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 1.2380952380952383e-05,
"loss": 1.659,
"step": 13
},
{
"epoch": 0.02,
"learning_rate": 1.3333333333333333e-05,
"loss": 1.5993,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 1.4285714285714287e-05,
"loss": 1.6294,
"step": 15
},
{
"epoch": 0.02,
"learning_rate": 1.523809523809524e-05,
"loss": 1.6448,
"step": 16
},
{
"epoch": 0.03,
"learning_rate": 1.6190476190476193e-05,
"loss": 1.646,
"step": 17
},
{
"epoch": 0.03,
"learning_rate": 1.7142857142857142e-05,
"loss": 1.6029,
"step": 18
},
{
"epoch": 0.03,
"learning_rate": 1.8095238095238097e-05,
"loss": 1.5918,
"step": 19
},
{
"epoch": 0.03,
"learning_rate": 1.904761904761905e-05,
"loss": 1.5732,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 2e-05,
"loss": 1.5891,
"step": 21
},
{
"epoch": 0.03,
"learning_rate": 1.9999883200175286e-05,
"loss": 1.5999,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 1.999953280342959e-05,
"loss": 1.6173,
"step": 23
},
{
"epoch": 0.04,
"learning_rate": 1.9998948817948157e-05,
"loss": 1.5544,
"step": 24
},
{
"epoch": 0.04,
"learning_rate": 1.9998131257372878e-05,
"loss": 1.5308,
"step": 25
},
{
"epoch": 0.04,
"learning_rate": 1.9997080140801932e-05,
"loss": 1.6077,
"step": 26
},
{
"epoch": 0.04,
"learning_rate": 1.9995795492789368e-05,
"loss": 1.5175,
"step": 27
},
{
"epoch": 0.04,
"learning_rate": 1.999427734334452e-05,
"loss": 1.4982,
"step": 28
},
{
"epoch": 0.04,
"learning_rate": 1.9992525727931303e-05,
"loss": 1.4936,
"step": 29
},
{
"epoch": 0.04,
"learning_rate": 1.9990540687467394e-05,
"loss": 1.4676,
"step": 30
},
{
"epoch": 0.05,
"learning_rate": 1.998832226832327e-05,
"loss": 1.5268,
"step": 31
},
{
"epoch": 0.05,
"learning_rate": 1.9985870522321118e-05,
"loss": 1.4486,
"step": 32
},
{
"epoch": 0.05,
"learning_rate": 1.9983185506733643e-05,
"loss": 1.5313,
"step": 33
},
{
"epoch": 0.05,
"learning_rate": 1.9980267284282718e-05,
"loss": 1.4926,
"step": 34
},
{
"epoch": 0.05,
"learning_rate": 1.9977115923137912e-05,
"loss": 1.4406,
"step": 35
},
{
"epoch": 0.05,
"learning_rate": 1.9973731496914914e-05,
"loss": 1.4931,
"step": 36
},
{
"epoch": 0.06,
"learning_rate": 1.9970114084673796e-05,
"loss": 1.5088,
"step": 37
},
{
"epoch": 0.06,
"learning_rate": 1.9966263770917192e-05,
"loss": 1.5093,
"step": 38
},
{
"epoch": 0.06,
"learning_rate": 1.996218064558829e-05,
"loss": 1.5074,
"step": 39
},
{
"epoch": 0.06,
"learning_rate": 1.9957864804068752e-05,
"loss": 1.4461,
"step": 40
},
{
"epoch": 0.06,
"learning_rate": 1.995331634717649e-05,
"loss": 1.4537,
"step": 41
},
{
"epoch": 0.06,
"learning_rate": 1.994853538116329e-05,
"loss": 1.4572,
"step": 42
},
{
"epoch": 0.06,
"learning_rate": 1.994352201771236e-05,
"loss": 1.4689,
"step": 43
},
{
"epoch": 0.07,
"learning_rate": 1.9938276373935688e-05,
"loss": 1.4414,
"step": 44
},
{
"epoch": 0.07,
"learning_rate": 1.993279857237133e-05,
"loss": 1.4722,
"step": 45
},
{
"epoch": 0.07,
"learning_rate": 1.992708874098054e-05,
"loss": 1.4739,
"step": 46
},
{
"epoch": 0.07,
"learning_rate": 1.9921147013144782e-05,
"loss": 1.4743,
"step": 47
},
{
"epoch": 0.07,
"learning_rate": 1.99149735276626e-05,
"loss": 1.5054,
"step": 48
},
{
"epoch": 0.07,
"learning_rate": 1.9908568428746408e-05,
"loss": 1.4858,
"step": 49
},
{
"epoch": 0.07,
"learning_rate": 1.9901931866019087e-05,
"loss": 1.3958,
"step": 50
},
{
"epoch": 0.08,
"learning_rate": 1.9895063994510512e-05,
"loss": 1.4096,
"step": 51
},
{
"epoch": 0.08,
"learning_rate": 1.988796497465392e-05,
"loss": 1.3925,
"step": 52
},
{
"epoch": 0.08,
"learning_rate": 1.9880634972282168e-05,
"loss": 1.4134,
"step": 53
},
{
"epoch": 0.08,
"learning_rate": 1.987307415862385e-05,
"loss": 1.4232,
"step": 54
},
{
"epoch": 0.08,
"learning_rate": 1.986528271029931e-05,
"loss": 1.4263,
"step": 55
},
{
"epoch": 0.08,
"learning_rate": 1.985726080931651e-05,
"loss": 1.4345,
"step": 56
},
{
"epoch": 0.08,
"learning_rate": 1.9849008643066774e-05,
"loss": 1.4655,
"step": 57
},
{
"epoch": 0.09,
"learning_rate": 1.9840526404320415e-05,
"loss": 1.4758,
"step": 58
},
{
"epoch": 0.09,
"learning_rate": 1.9831814291222233e-05,
"loss": 1.4192,
"step": 59
},
{
"epoch": 0.09,
"learning_rate": 1.982287250728689e-05,
"loss": 1.5018,
"step": 60
},
{
"epoch": 0.09,
"learning_rate": 1.9813701261394136e-05,
"loss": 1.4578,
"step": 61
},
{
"epoch": 0.09,
"learning_rate": 1.9804300767783958e-05,
"loss": 1.4346,
"step": 62
},
{
"epoch": 0.09,
"learning_rate": 1.979467124605156e-05,
"loss": 1.4475,
"step": 63
},
{
"epoch": 0.1,
"learning_rate": 1.9784812921142232e-05,
"loss": 1.4009,
"step": 64
},
{
"epoch": 0.1,
"learning_rate": 1.977472602334609e-05,
"loss": 1.4464,
"step": 65
},
{
"epoch": 0.1,
"learning_rate": 1.9764410788292724e-05,
"loss": 1.3768,
"step": 66
},
{
"epoch": 0.1,
"learning_rate": 1.9753867456945653e-05,
"loss": 1.3851,
"step": 67
},
{
"epoch": 0.1,
"learning_rate": 1.9743096275596735e-05,
"loss": 1.4259,
"step": 68
},
{
"epoch": 0.1,
"learning_rate": 1.9732097495860388e-05,
"loss": 1.4571,
"step": 69
},
{
"epoch": 0.1,
"learning_rate": 1.9720871374667714e-05,
"loss": 1.4097,
"step": 70
},
{
"epoch": 0.11,
"learning_rate": 1.9709418174260523e-05,
"loss": 1.536,
"step": 71
},
{
"epoch": 0.11,
"learning_rate": 1.9697738162185163e-05,
"loss": 1.4237,
"step": 72
},
{
"epoch": 0.11,
"learning_rate": 1.9685831611286312e-05,
"loss": 1.3861,
"step": 73
},
{
"epoch": 0.11,
"learning_rate": 1.9673698799700582e-05,
"loss": 1.4183,
"step": 74
},
{
"epoch": 0.11,
"learning_rate": 1.9661340010850025e-05,
"loss": 1.4255,
"step": 75
},
{
"epoch": 0.11,
"learning_rate": 1.9648755533435517e-05,
"loss": 1.4681,
"step": 76
},
{
"epoch": 0.11,
"learning_rate": 1.9635945661430006e-05,
"loss": 1.4336,
"step": 77
},
{
"epoch": 0.12,
"learning_rate": 1.9622910694071654e-05,
"loss": 1.4406,
"step": 78
},
{
"epoch": 0.12,
"learning_rate": 1.9609650935856847e-05,
"loss": 1.4647,
"step": 79
},
{
"epoch": 0.12,
"learning_rate": 1.9596166696533062e-05,
"loss": 1.4086,
"step": 80
},
{
"epoch": 0.12,
"learning_rate": 1.9582458291091664e-05,
"loss": 1.3972,
"step": 81
},
{
"epoch": 0.12,
"learning_rate": 1.956852603976052e-05,
"loss": 1.4239,
"step": 82
},
{
"epoch": 0.12,
"learning_rate": 1.9554370267996537e-05,
"loss": 1.433,
"step": 83
},
{
"epoch": 0.13,
"learning_rate": 1.9539991306478046e-05,
"loss": 1.4477,
"step": 84
},
{
"epoch": 0.13,
"learning_rate": 1.952538949109708e-05,
"loss": 1.4227,
"step": 85
},
{
"epoch": 0.13,
"learning_rate": 1.9510565162951538e-05,
"loss": 1.4291,
"step": 86
},
{
"epoch": 0.13,
"learning_rate": 1.9495518668337204e-05,
"loss": 1.3841,
"step": 87
},
{
"epoch": 0.13,
"learning_rate": 1.9480250358739667e-05,
"loss": 1.3668,
"step": 88
},
{
"epoch": 0.13,
"learning_rate": 1.94647605908261e-05,
"loss": 1.4258,
"step": 89
},
{
"epoch": 0.13,
"learning_rate": 1.944904972643694e-05,
"loss": 1.4438,
"step": 90
},
{
"epoch": 0.14,
"learning_rate": 1.9433118132577432e-05,
"loss": 1.4337,
"step": 91
},
{
"epoch": 0.14,
"learning_rate": 1.9416966181409047e-05,
"loss": 1.4111,
"step": 92
},
{
"epoch": 0.14,
"learning_rate": 1.94005942502408e-05,
"loss": 1.4473,
"step": 93
},
{
"epoch": 0.14,
"learning_rate": 1.9384002721520423e-05,
"loss": 1.3321,
"step": 94
},
{
"epoch": 0.14,
"learning_rate": 1.936719198282545e-05,
"loss": 1.3896,
"step": 95
},
{
"epoch": 0.14,
"learning_rate": 1.9350162426854152e-05,
"loss": 1.3459,
"step": 96
},
{
"epoch": 0.14,
"learning_rate": 1.933291445141635e-05,
"loss": 1.4044,
"step": 97
},
{
"epoch": 0.15,
"learning_rate": 1.931544845942415e-05,
"loss": 1.4181,
"step": 98
},
{
"epoch": 0.15,
"learning_rate": 1.9297764858882516e-05,
"loss": 1.4039,
"step": 99
},
{
"epoch": 0.15,
"learning_rate": 1.927986406287973e-05,
"loss": 1.3454,
"step": 100
},
{
"epoch": 0.15,
"learning_rate": 1.9261746489577767e-05,
"loss": 1.406,
"step": 101
},
{
"epoch": 0.15,
"learning_rate": 1.92434125622025e-05,
"loss": 1.431,
"step": 102
},
{
"epoch": 0.15,
"learning_rate": 1.9224862709033823e-05,
"loss": 1.3683,
"step": 103
},
{
"epoch": 0.15,
"learning_rate": 1.9206097363395668e-05,
"loss": 1.3528,
"step": 104
},
{
"epoch": 0.16,
"learning_rate": 1.9187116963645845e-05,
"loss": 1.4111,
"step": 105
},
{
"epoch": 0.16,
"learning_rate": 1.9167921953165827e-05,
"loss": 1.3924,
"step": 106
},
{
"epoch": 0.16,
"learning_rate": 1.9148512780350384e-05,
"loss": 1.3701,
"step": 107
},
{
"epoch": 0.16,
"learning_rate": 1.9128889898597117e-05,
"loss": 1.327,
"step": 108
},
{
"epoch": 0.16,
"learning_rate": 1.910905376629585e-05,
"loss": 1.3746,
"step": 109
},
{
"epoch": 0.16,
"learning_rate": 1.9089004846817947e-05,
"loss": 1.3469,
"step": 110
},
{
"epoch": 0.17,
"learning_rate": 1.9068743608505454e-05,
"loss": 1.3765,
"step": 111
},
{
"epoch": 0.17,
"learning_rate": 1.9048270524660197e-05,
"loss": 1.4148,
"step": 112
},
{
"epoch": 0.17,
"learning_rate": 1.902758607353269e-05,
"loss": 1.4208,
"step": 113
},
{
"epoch": 0.17,
"learning_rate": 1.9006690738310988e-05,
"loss": 1.3988,
"step": 114
},
{
"epoch": 0.17,
"learning_rate": 1.898558500710939e-05,
"loss": 1.3937,
"step": 115
},
{
"epoch": 0.17,
"learning_rate": 1.896426937295704e-05,
"loss": 1.4248,
"step": 116
},
{
"epoch": 0.17,
"learning_rate": 1.89427443337864e-05,
"loss": 1.3625,
"step": 117
},
{
"epoch": 0.18,
"learning_rate": 1.8921010392421628e-05,
"loss": 1.3566,
"step": 118
},
{
"epoch": 0.18,
"learning_rate": 1.889906805656684e-05,
"loss": 1.3763,
"step": 119
},
{
"epoch": 0.18,
"learning_rate": 1.8876917838794226e-05,
"loss": 1.4015,
"step": 120
},
{
"epoch": 0.18,
"learning_rate": 1.8854560256532098e-05,
"loss": 1.3169,
"step": 121
},
{
"epoch": 0.18,
"learning_rate": 1.8831995832052802e-05,
"loss": 1.4215,
"step": 122
},
{
"epoch": 0.18,
"learning_rate": 1.8809225092460488e-05,
"loss": 1.4371,
"step": 123
},
{
"epoch": 0.18,
"learning_rate": 1.8786248569678847e-05,
"loss": 1.4199,
"step": 124
},
{
"epoch": 0.19,
"learning_rate": 1.8763066800438638e-05,
"loss": 1.3484,
"step": 125
},
{
"epoch": 0.19,
"learning_rate": 1.873968032626518e-05,
"loss": 1.3701,
"step": 126
},
{
"epoch": 0.19,
"learning_rate": 1.8716089693465696e-05,
"loss": 1.359,
"step": 127
},
{
"epoch": 0.19,
"learning_rate": 1.869229545311653e-05,
"loss": 1.3674,
"step": 128
},
{
"epoch": 0.19,
"learning_rate": 1.8668298161050308e-05,
"loss": 1.3531,
"step": 129
},
{
"epoch": 0.19,
"learning_rate": 1.8644098377842934e-05,
"loss": 1.3539,
"step": 130
},
{
"epoch": 0.19,
"learning_rate": 1.8619696668800494e-05,
"loss": 1.351,
"step": 131
},
{
"epoch": 0.2,
"learning_rate": 1.8595093603946053e-05,
"loss": 1.4147,
"step": 132
},
{
"epoch": 0.2,
"learning_rate": 1.8570289758006346e-05,
"loss": 1.4284,
"step": 133
},
{
"epoch": 0.2,
"learning_rate": 1.8545285710398343e-05,
"loss": 1.3313,
"step": 134
},
{
"epoch": 0.2,
"learning_rate": 1.852008204521572e-05,
"loss": 1.3986,
"step": 135
},
{
"epoch": 0.2,
"learning_rate": 1.8494679351215212e-05,
"loss": 1.3815,
"step": 136
},
{
"epoch": 0.2,
"learning_rate": 1.846907822180286e-05,
"loss": 1.4004,
"step": 137
},
{
"epoch": 0.21,
"learning_rate": 1.8443279255020153e-05,
"loss": 1.3541,
"step": 138
},
{
"epoch": 0.21,
"learning_rate": 1.8417283053530047e-05,
"loss": 1.3502,
"step": 139
},
{
"epoch": 0.21,
"learning_rate": 1.8391090224602895e-05,
"loss": 1.4261,
"step": 140
},
{
"epoch": 0.21,
"learning_rate": 1.8364701380102267e-05,
"loss": 1.3425,
"step": 141
},
{
"epoch": 0.21,
"learning_rate": 1.8338117136470648e-05,
"loss": 1.3581,
"step": 142
},
{
"epoch": 0.21,
"learning_rate": 1.831133811471503e-05,
"loss": 1.3936,
"step": 143
},
{
"epoch": 0.21,
"learning_rate": 1.8284364940392426e-05,
"loss": 1.2922,
"step": 144
},
{
"epoch": 0.22,
"learning_rate": 1.825719824359524e-05,
"loss": 1.4399,
"step": 145
},
{
"epoch": 0.22,
"learning_rate": 1.8229838658936566e-05,
"loss": 1.425,
"step": 146
},
{
"epoch": 0.22,
"learning_rate": 1.820228682553533e-05,
"loss": 1.376,
"step": 147
},
{
"epoch": 0.22,
"learning_rate": 1.8174543387001403e-05,
"loss": 1.3611,
"step": 148
},
{
"epoch": 0.22,
"learning_rate": 1.8146608991420533e-05,
"loss": 1.2964,
"step": 149
},
{
"epoch": 0.22,
"learning_rate": 1.811848429133922e-05,
"loss": 1.3837,
"step": 150
},
{
"epoch": 0.22,
"learning_rate": 1.8090169943749477e-05,
"loss": 1.3623,
"step": 151
},
{
"epoch": 0.23,
"learning_rate": 1.8061666610073465e-05,
"loss": 1.417,
"step": 152
},
{
"epoch": 0.23,
"learning_rate": 1.8032974956148064e-05,
"loss": 1.3424,
"step": 153
},
{
"epoch": 0.23,
"learning_rate": 1.8004095652209304e-05,
"loss": 1.3589,
"step": 154
},
{
"epoch": 0.23,
"learning_rate": 1.7975029372876706e-05,
"loss": 1.3266,
"step": 155
},
{
"epoch": 0.23,
"learning_rate": 1.7945776797137544e-05,
"loss": 1.412,
"step": 156
},
{
"epoch": 0.23,
"learning_rate": 1.791633860833096e-05,
"loss": 1.3824,
"step": 157
},
{
"epoch": 0.24,
"learning_rate": 1.7886715494132008e-05,
"loss": 1.331,
"step": 158
},
{
"epoch": 0.24,
"learning_rate": 1.7856908146535602e-05,
"loss": 1.3412,
"step": 159
},
{
"epoch": 0.24,
"learning_rate": 1.7826917261840337e-05,
"loss": 1.3855,
"step": 160
},
{
"epoch": 0.24,
"learning_rate": 1.7796743540632226e-05,
"loss": 1.3392,
"step": 161
},
{
"epoch": 0.24,
"learning_rate": 1.7766387687768338e-05,
"loss": 1.3154,
"step": 162
},
{
"epoch": 0.24,
"learning_rate": 1.7735850412360332e-05,
"loss": 1.4047,
"step": 163
},
{
"epoch": 0.24,
"learning_rate": 1.7705132427757895e-05,
"loss": 1.3719,
"step": 164
},
{
"epoch": 0.25,
"learning_rate": 1.7674234451532065e-05,
"loss": 1.3858,
"step": 165
},
{
"epoch": 0.25,
"learning_rate": 1.7643157205458483e-05,
"loss": 1.372,
"step": 166
},
{
"epoch": 0.25,
"learning_rate": 1.7611901415500536e-05,
"loss": 1.3725,
"step": 167
},
{
"epoch": 0.25,
"learning_rate": 1.7580467811792374e-05,
"loss": 1.3229,
"step": 168
},
{
"epoch": 0.25,
"learning_rate": 1.7548857128621878e-05,
"loss": 1.3614,
"step": 169
},
{
"epoch": 0.25,
"learning_rate": 1.7517070104413497e-05,
"loss": 1.3202,
"step": 170
},
{
"epoch": 0.25,
"learning_rate": 1.7485107481711014e-05,
"loss": 1.328,
"step": 171
},
{
"epoch": 0.26,
"learning_rate": 1.745297000716016e-05,
"loss": 1.4035,
"step": 172
},
{
"epoch": 0.26,
"learning_rate": 1.7420658431491224e-05,
"loss": 1.3921,
"step": 173
},
{
"epoch": 0.26,
"learning_rate": 1.7388173509501475e-05,
"loss": 1.3991,
"step": 174
},
{
"epoch": 0.26,
"learning_rate": 1.7355516000037555e-05,
"loss": 1.3446,
"step": 175
},
{
"epoch": 0.26,
"learning_rate": 1.7322686665977738e-05,
"loss": 1.3412,
"step": 176
},
{
"epoch": 0.26,
"learning_rate": 1.7289686274214116e-05,
"loss": 1.3459,
"step": 177
},
{
"epoch": 0.26,
"learning_rate": 1.7256515595634688e-05,
"loss": 1.3381,
"step": 178
},
{
"epoch": 0.27,
"learning_rate": 1.722317540510534e-05,
"loss": 1.3947,
"step": 179
},
{
"epoch": 0.27,
"learning_rate": 1.7189666481451755e-05,
"loss": 1.3186,
"step": 180
},
{
"epoch": 0.27,
"learning_rate": 1.715598960744121e-05,
"loss": 1.3873,
"step": 181
},
{
"epoch": 0.27,
"learning_rate": 1.712214556976431e-05,
"loss": 1.2937,
"step": 182
},
{
"epoch": 0.27,
"learning_rate": 1.7088135159016584e-05,
"loss": 1.3279,
"step": 183
},
{
"epoch": 0.27,
"learning_rate": 1.7053959169680033e-05,
"loss": 1.4129,
"step": 184
},
{
"epoch": 0.28,
"learning_rate": 1.7019618400104572e-05,
"loss": 1.2923,
"step": 185
},
{
"epoch": 0.28,
"learning_rate": 1.6985113652489374e-05,
"loss": 1.3812,
"step": 186
},
{
"epoch": 0.28,
"learning_rate": 1.695044573286413e-05,
"loss": 1.3207,
"step": 187
},
{
"epoch": 0.28,
"learning_rate": 1.6915615451070234e-05,
"loss": 1.3297,
"step": 188
},
{
"epoch": 0.28,
"learning_rate": 1.6880623620741843e-05,
"loss": 1.3116,
"step": 189
},
{
"epoch": 0.28,
"learning_rate": 1.684547105928689e-05,
"loss": 1.3302,
"step": 190
},
{
"epoch": 0.28,
"learning_rate": 1.6810158587867973e-05,
"loss": 1.3288,
"step": 191
},
{
"epoch": 0.29,
"learning_rate": 1.677468703138319e-05,
"loss": 1.3685,
"step": 192
},
{
"epoch": 0.29,
"learning_rate": 1.673905721844686e-05,
"loss": 1.3432,
"step": 193
},
{
"epoch": 0.29,
"learning_rate": 1.670326998137016e-05,
"loss": 1.3562,
"step": 194
},
{
"epoch": 0.29,
"learning_rate": 1.666732615614169e-05,
"loss": 1.2824,
"step": 195
},
{
"epoch": 0.29,
"learning_rate": 1.6631226582407954e-05,
"loss": 1.3672,
"step": 196
},
{
"epoch": 0.29,
"learning_rate": 1.6594972103453727e-05,
"loss": 1.3745,
"step": 197
},
{
"epoch": 0.29,
"learning_rate": 1.6558563566182365e-05,
"loss": 1.3037,
"step": 198
},
{
"epoch": 0.3,
"learning_rate": 1.652200182109602e-05,
"loss": 1.3838,
"step": 199
},
{
"epoch": 0.3,
"learning_rate": 1.6485287722275783e-05,
"loss": 1.2685,
"step": 200
},
{
"epoch": 0.3,
"learning_rate": 1.6448422127361707e-05,
"loss": 1.3409,
"step": 201
},
{
"epoch": 0.3,
"learning_rate": 1.64114058975328e-05,
"loss": 1.2576,
"step": 202
},
{
"epoch": 0.3,
"learning_rate": 1.63742398974869e-05,
"loss": 1.336,
"step": 203
},
{
"epoch": 0.3,
"learning_rate": 1.6336924995420453e-05,
"loss": 1.3551,
"step": 204
},
{
"epoch": 0.31,
"learning_rate": 1.6299462063008272e-05,
"loss": 1.2506,
"step": 205
},
{
"epoch": 0.31,
"learning_rate": 1.626185197538314e-05,
"loss": 1.3776,
"step": 206
},
{
"epoch": 0.31,
"learning_rate": 1.6224095611115385e-05,
"loss": 1.3258,
"step": 207
},
{
"epoch": 0.31,
"learning_rate": 1.6186193852192356e-05,
"loss": 1.3096,
"step": 208
},
{
"epoch": 0.31,
"learning_rate": 1.6148147583997813e-05,
"loss": 1.3984,
"step": 209
},
{
"epoch": 0.31,
"learning_rate": 1.6109957695291246e-05,
"loss": 1.3572,
"step": 210
},
{
"epoch": 0.31,
"learning_rate": 1.6071625078187113e-05,
"loss": 1.2951,
"step": 211
},
{
"epoch": 0.32,
"learning_rate": 1.603315062813401e-05,
"loss": 1.3418,
"step": 212
},
{
"epoch": 0.32,
"learning_rate": 1.5994535243893742e-05,
"loss": 1.3248,
"step": 213
},
{
"epoch": 0.32,
"learning_rate": 1.5955779827520327e-05,
"loss": 1.3535,
"step": 214
},
{
"epoch": 0.32,
"learning_rate": 1.5916885284338937e-05,
"loss": 1.3711,
"step": 215
},
{
"epoch": 0.32,
"learning_rate": 1.5877852522924733e-05,
"loss": 1.3681,
"step": 216
},
{
"epoch": 0.32,
"learning_rate": 1.5838682455081657e-05,
"loss": 1.3347,
"step": 217
},
{
"epoch": 0.32,
"learning_rate": 1.5799375995821116e-05,
"loss": 1.3638,
"step": 218
},
{
"epoch": 0.33,
"learning_rate": 1.5759934063340627e-05,
"loss": 1.31,
"step": 219
},
{
"epoch": 0.33,
"learning_rate": 1.5720357579002346e-05,
"loss": 1.3567,
"step": 220
},
{
"epoch": 0.33,
"learning_rate": 1.568064746731156e-05,
"loss": 1.3835,
"step": 221
},
{
"epoch": 0.33,
"learning_rate": 1.5640804655895086e-05,
"loss": 1.291,
"step": 222
},
{
"epoch": 0.33,
"learning_rate": 1.5600830075479604e-05,
"loss": 1.3224,
"step": 223
},
{
"epoch": 0.33,
"learning_rate": 1.5560724659869905e-05,
"loss": 1.3499,
"step": 224
},
{
"epoch": 0.33,
"learning_rate": 1.5520489345927095e-05,
"loss": 1.3619,
"step": 225
},
{
"epoch": 0.34,
"learning_rate": 1.5480125073546705e-05,
"loss": 1.2996,
"step": 226
},
{
"epoch": 0.34,
"learning_rate": 1.5439632785636707e-05,
"loss": 1.3226,
"step": 227
},
{
"epoch": 0.34,
"learning_rate": 1.539901342809554e-05,
"loss": 1.3902,
"step": 228
},
{
"epoch": 0.34,
"learning_rate": 1.5358267949789968e-05,
"loss": 1.3496,
"step": 229
},
{
"epoch": 0.34,
"learning_rate": 1.5317397302532933e-05,
"loss": 1.311,
"step": 230
},
{
"epoch": 0.34,
"learning_rate": 1.527640244106133e-05,
"loss": 1.3254,
"step": 231
},
{
"epoch": 0.35,
"learning_rate": 1.5235284323013674e-05,
"loss": 1.3453,
"step": 232
},
{
"epoch": 0.35,
"learning_rate": 1.5194043908907774e-05,
"loss": 1.3137,
"step": 233
},
{
"epoch": 0.35,
"learning_rate": 1.515268216211825e-05,
"loss": 1.3203,
"step": 234
},
{
"epoch": 0.35,
"learning_rate": 1.5111200048854055e-05,
"loss": 1.3282,
"step": 235
},
{
"epoch": 0.35,
"learning_rate": 1.5069598538135905e-05,
"loss": 1.3457,
"step": 236
},
{
"epoch": 0.35,
"learning_rate": 1.5027878601773633e-05,
"loss": 1.2971,
"step": 237
},
{
"epoch": 0.35,
"learning_rate": 1.4986041214343487e-05,
"loss": 1.3374,
"step": 238
},
{
"epoch": 0.36,
"learning_rate": 1.494408735316537e-05,
"loss": 1.3082,
"step": 239
},
{
"epoch": 0.36,
"learning_rate": 1.490201799828001e-05,
"loss": 1.3323,
"step": 240
},
{
"epoch": 0.36,
"learning_rate": 1.485983413242606e-05,
"loss": 1.3553,
"step": 241
},
{
"epoch": 0.36,
"learning_rate": 1.4817536741017153e-05,
"loss": 1.3666,
"step": 242
},
{
"epoch": 0.36,
"learning_rate": 1.4775126812118865e-05,
"loss": 1.3257,
"step": 243
},
{
"epoch": 0.36,
"learning_rate": 1.4732605336425651e-05,
"loss": 1.3477,
"step": 244
},
{
"epoch": 0.36,
"learning_rate": 1.4689973307237687e-05,
"loss": 1.3321,
"step": 245
},
{
"epoch": 0.37,
"learning_rate": 1.4647231720437687e-05,
"loss": 1.363,
"step": 246
},
{
"epoch": 0.37,
"learning_rate": 1.4604381574467616e-05,
"loss": 1.35,
"step": 247
},
{
"epoch": 0.37,
"learning_rate": 1.4561423870305383e-05,
"loss": 1.3416,
"step": 248
},
{
"epoch": 0.37,
"learning_rate": 1.4518359611441452e-05,
"loss": 1.3453,
"step": 249
},
{
"epoch": 0.37,
"learning_rate": 1.4475189803855399e-05,
"loss": 1.3792,
"step": 250
},
{
"epoch": 0.37,
"learning_rate": 1.4431915455992416e-05,
"loss": 1.291,
"step": 251
},
{
"epoch": 0.38,
"learning_rate": 1.438853757873975e-05,
"loss": 1.3631,
"step": 252
},
{
"epoch": 0.38,
"learning_rate": 1.43450571854031e-05,
"loss": 1.299,
"step": 253
},
{
"epoch": 0.38,
"learning_rate": 1.430147529168292e-05,
"loss": 1.2753,
"step": 254
},
{
"epoch": 0.38,
"learning_rate": 1.4257792915650728e-05,
"loss": 1.3002,
"step": 255
},
{
"epoch": 0.38,
"learning_rate": 1.4214011077725293e-05,
"loss": 1.3069,
"step": 256
},
{
"epoch": 0.38,
"learning_rate": 1.4170130800648814e-05,
"loss": 1.3388,
"step": 257
},
{
"epoch": 0.38,
"learning_rate": 1.4126153109463025e-05,
"loss": 1.3223,
"step": 258
},
{
"epoch": 0.39,
"learning_rate": 1.4082079031485253e-05,
"loss": 1.3053,
"step": 259
},
{
"epoch": 0.39,
"learning_rate": 1.403790959628441e-05,
"loss": 1.3334,
"step": 260
},
{
"epoch": 0.39,
"learning_rate": 1.3993645835656955e-05,
"loss": 1.2915,
"step": 261
},
{
"epoch": 0.39,
"learning_rate": 1.394928878360279e-05,
"loss": 1.3546,
"step": 262
},
{
"epoch": 0.39,
"learning_rate": 1.3904839476301091e-05,
"loss": 1.3055,
"step": 263
},
{
"epoch": 0.39,
"learning_rate": 1.3860298952086118e-05,
"loss": 1.3464,
"step": 264
},
{
"epoch": 0.39,
"learning_rate": 1.3815668251422953e-05,
"loss": 1.3684,
"step": 265
},
{
"epoch": 0.4,
"learning_rate": 1.3770948416883205e-05,
"loss": 1.3457,
"step": 266
},
{
"epoch": 0.4,
"learning_rate": 1.3726140493120639e-05,
"loss": 1.373,
"step": 267
},
{
"epoch": 0.4,
"learning_rate": 1.3681245526846782e-05,
"loss": 1.2898,
"step": 268
},
{
"epoch": 0.4,
"learning_rate": 1.3636264566806473e-05,
"loss": 1.3207,
"step": 269
},
{
"epoch": 0.4,
"learning_rate": 1.3591198663753358e-05,
"loss": 1.3466,
"step": 270
},
{
"epoch": 0.4,
"learning_rate": 1.3546048870425356e-05,
"loss": 1.3006,
"step": 271
},
{
"epoch": 0.4,
"learning_rate": 1.3500816241520059e-05,
"loss": 1.3772,
"step": 272
},
{
"epoch": 0.41,
"learning_rate": 1.3455501833670089e-05,
"loss": 1.3406,
"step": 273
},
{
"epoch": 0.41,
"learning_rate": 1.3410106705418424e-05,
"loss": 1.2943,
"step": 274
},
{
"epoch": 0.41,
"learning_rate": 1.3364631917193671e-05,
"loss": 1.2715,
"step": 275
},
{
"epoch": 0.41,
"learning_rate": 1.3319078531285286e-05,
"loss": 1.2483,
"step": 276
},
{
"epoch": 0.41,
"learning_rate": 1.3273447611818768e-05,
"loss": 1.3117,
"step": 277
},
{
"epoch": 0.41,
"learning_rate": 1.3227740224730799e-05,
"loss": 1.2942,
"step": 278
},
{
"epoch": 0.42,
"learning_rate": 1.3181957437744333e-05,
"loss": 1.303,
"step": 279
},
{
"epoch": 0.42,
"learning_rate": 1.3136100320343674e-05,
"loss": 1.3261,
"step": 280
},
{
"epoch": 0.42,
"learning_rate": 1.3090169943749475e-05,
"loss": 1.2932,
"step": 281
},
{
"epoch": 0.42,
"learning_rate": 1.3044167380893726e-05,
"loss": 1.2304,
"step": 282
},
{
"epoch": 0.42,
"learning_rate": 1.2998093706394676e-05,
"loss": 1.3353,
"step": 283
},
{
"epoch": 0.42,
"learning_rate": 1.295194999653175e-05,
"loss": 1.2972,
"step": 284
},
{
"epoch": 0.42,
"learning_rate": 1.2905737329220394e-05,
"loss": 1.2799,
"step": 285
},
{
"epoch": 0.43,
"learning_rate": 1.2859456783986892e-05,
"loss": 1.3691,
"step": 286
},
{
"epoch": 0.43,
"learning_rate": 1.2813109441943166e-05,
"loss": 1.2723,
"step": 287
},
{
"epoch": 0.43,
"learning_rate": 1.2766696385761494e-05,
"loss": 1.2673,
"step": 288
},
{
"epoch": 0.43,
"learning_rate": 1.2720218699649243e-05,
"loss": 1.3167,
"step": 289
},
{
"epoch": 0.43,
"learning_rate": 1.2673677469323532e-05,
"loss": 1.3344,
"step": 290
},
{
"epoch": 0.43,
"learning_rate": 1.262707378198587e-05,
"loss": 1.2612,
"step": 291
},
{
"epoch": 0.43,
"learning_rate": 1.258040872629676e-05,
"loss": 1.3137,
"step": 292
},
{
"epoch": 0.44,
"learning_rate": 1.2533683392350264e-05,
"loss": 1.3172,
"step": 293
},
{
"epoch": 0.44,
"learning_rate": 1.2486898871648552e-05,
"loss": 1.3587,
"step": 294
},
{
"epoch": 0.44,
"learning_rate": 1.2440056257076376e-05,
"loss": 1.3483,
"step": 295
},
{
"epoch": 0.44,
"learning_rate": 1.2393156642875579e-05,
"loss": 1.333,
"step": 296
},
{
"epoch": 0.44,
"learning_rate": 1.2346201124619502e-05,
"loss": 1.3102,
"step": 297
},
{
"epoch": 0.44,
"learning_rate": 1.2299190799187405e-05,
"loss": 1.3282,
"step": 298
},
{
"epoch": 0.45,
"learning_rate": 1.2252126764738845e-05,
"loss": 1.3333,
"step": 299
},
{
"epoch": 0.45,
"learning_rate": 1.2205010120688012e-05,
"loss": 1.2662,
"step": 300
},
{
"epoch": 0.45,
"learning_rate": 1.2157841967678064e-05,
"loss": 1.3086,
"step": 301
},
{
"epoch": 0.45,
"learning_rate": 1.2110623407555398e-05,
"loss": 1.2812,
"step": 302
},
{
"epoch": 0.45,
"learning_rate": 1.2063355543343925e-05,
"loss": 1.3071,
"step": 303
},
{
"epoch": 0.45,
"learning_rate": 1.2016039479219293e-05,
"loss": 1.2932,
"step": 304
},
{
"epoch": 0.45,
"learning_rate": 1.1968676320483103e-05,
"loss": 1.3059,
"step": 305
},
{
"epoch": 0.46,
"learning_rate": 1.1921267173537085e-05,
"loss": 1.2301,
"step": 306
},
{
"epoch": 0.46,
"learning_rate": 1.187381314585725e-05,
"loss": 1.3119,
"step": 307
},
{
"epoch": 0.46,
"learning_rate": 1.1826315345968014e-05,
"loss": 1.3146,
"step": 308
},
{
"epoch": 0.46,
"learning_rate": 1.1778774883416325e-05,
"loss": 1.3019,
"step": 309
},
{
"epoch": 0.46,
"learning_rate": 1.1731192868745716e-05,
"loss": 1.3268,
"step": 310
},
{
"epoch": 0.46,
"learning_rate": 1.1683570413470384e-05,
"loss": 1.3823,
"step": 311
},
{
"epoch": 0.46,
"learning_rate": 1.163590863004922e-05,
"loss": 1.2614,
"step": 312
},
{
"epoch": 0.47,
"learning_rate": 1.1588208631859808e-05,
"loss": 1.3186,
"step": 313
},
{
"epoch": 0.47,
"learning_rate": 1.154047153317243e-05,
"loss": 1.3654,
"step": 314
},
{
"epoch": 0.47,
"learning_rate": 1.1492698449124042e-05,
"loss": 1.2827,
"step": 315
},
{
"epoch": 0.47,
"learning_rate": 1.1444890495692214e-05,
"loss": 1.3103,
"step": 316
},
{
"epoch": 0.47,
"learning_rate": 1.1397048789669061e-05,
"loss": 1.2942,
"step": 317
},
{
"epoch": 0.47,
"learning_rate": 1.1349174448635158e-05,
"loss": 1.3167,
"step": 318
},
{
"epoch": 0.47,
"learning_rate": 1.1301268590933434e-05,
"loss": 1.2643,
"step": 319
},
{
"epoch": 0.48,
"learning_rate": 1.1253332335643043e-05,
"loss": 1.3429,
"step": 320
},
{
"epoch": 0.48,
"learning_rate": 1.1205366802553231e-05,
"loss": 1.2444,
"step": 321
},
{
"epoch": 0.48,
"learning_rate": 1.1157373112137171e-05,
"loss": 1.2624,
"step": 322
},
{
"epoch": 0.48,
"learning_rate": 1.1109352385525782e-05,
"loss": 1.2937,
"step": 323
},
{
"epoch": 0.48,
"learning_rate": 1.106130574448156e-05,
"loss": 1.2665,
"step": 324
},
{
"epoch": 0.48,
"learning_rate": 1.1013234311372353e-05,
"loss": 1.2902,
"step": 325
},
{
"epoch": 0.49,
"learning_rate": 1.0965139209145153e-05,
"loss": 1.2627,
"step": 326
},
{
"epoch": 0.49,
"learning_rate": 1.0917021561299864e-05,
"loss": 1.2962,
"step": 327
},
{
"epoch": 0.49,
"learning_rate": 1.0868882491863048e-05,
"loss": 1.304,
"step": 328
},
{
"epoch": 0.49,
"learning_rate": 1.0820723125361685e-05,
"loss": 1.35,
"step": 329
},
{
"epoch": 0.49,
"learning_rate": 1.077254458679689e-05,
"loss": 1.3037,
"step": 330
},
{
"epoch": 0.49,
"learning_rate": 1.0724348001617626e-05,
"loss": 1.3405,
"step": 331
},
{
"epoch": 0.49,
"learning_rate": 1.0676134495694439e-05,
"loss": 1.3029,
"step": 332
},
{
"epoch": 0.5,
"learning_rate": 1.0627905195293135e-05,
"loss": 1.3236,
"step": 333
},
{
"epoch": 0.5,
"learning_rate": 1.0579661227048484e-05,
"loss": 1.3413,
"step": 334
},
{
"epoch": 0.5,
"learning_rate": 1.0531403717937888e-05,
"loss": 1.2775,
"step": 335
},
{
"epoch": 0.5,
"learning_rate": 1.0483133795255072e-05,
"loss": 1.2882,
"step": 336
},
{
"epoch": 0.5,
"learning_rate": 1.0434852586583737e-05,
"loss": 1.2673,
"step": 337
},
{
"epoch": 0.5,
"learning_rate": 1.0386561219771222e-05,
"loss": 1.261,
"step": 338
},
{
"epoch": 0.5,
"learning_rate": 1.0338260822902166e-05,
"loss": 1.2897,
"step": 339
},
{
"epoch": 0.51,
"learning_rate": 1.0289952524272147e-05,
"loss": 1.2824,
"step": 340
},
{
"epoch": 0.51,
"learning_rate": 1.0241637452361323e-05,
"loss": 1.2769,
"step": 341
},
{
"epoch": 0.51,
"learning_rate": 1.0193316735808085e-05,
"loss": 1.3164,
"step": 342
},
{
"epoch": 0.51,
"learning_rate": 1.0144991503382676e-05,
"loss": 1.3313,
"step": 343
},
{
"epoch": 0.51,
"learning_rate": 1.0096662883960833e-05,
"loss": 1.3318,
"step": 344
},
{
"epoch": 0.51,
"learning_rate": 1.0048332006497406e-05,
"loss": 1.2491,
"step": 345
},
{
"epoch": 0.51,
"learning_rate": 1e-05,
"loss": 1.271,
"step": 346
},
{
"epoch": 0.52,
"learning_rate": 9.951667993502599e-06,
"loss": 1.2924,
"step": 347
},
{
"epoch": 0.52,
"learning_rate": 9.903337116039172e-06,
"loss": 1.307,
"step": 348
},
{
"epoch": 0.52,
"learning_rate": 9.855008496617326e-06,
"loss": 1.297,
"step": 349
},
{
"epoch": 0.52,
"learning_rate": 9.806683264191916e-06,
"loss": 1.2918,
"step": 350
},
{
"epoch": 0.52,
"learning_rate": 9.75836254763868e-06,
"loss": 1.3302,
"step": 351
},
{
"epoch": 0.52,
"learning_rate": 9.710047475727854e-06,
"loss": 1.2808,
"step": 352
},
{
"epoch": 0.53,
"learning_rate": 9.661739177097836e-06,
"loss": 1.2293,
"step": 353
},
{
"epoch": 0.53,
"learning_rate": 9.613438780228777e-06,
"loss": 1.2635,
"step": 354
},
{
"epoch": 0.53,
"learning_rate": 9.565147413416266e-06,
"loss": 1.3211,
"step": 355
},
{
"epoch": 0.53,
"learning_rate": 9.516866204744932e-06,
"loss": 1.3137,
"step": 356
},
{
"epoch": 0.53,
"learning_rate": 9.468596282062114e-06,
"loss": 1.3195,
"step": 357
},
{
"epoch": 0.53,
"learning_rate": 9.420338772951521e-06,
"loss": 1.2673,
"step": 358
},
{
"epoch": 0.53,
"learning_rate": 9.372094804706867e-06,
"loss": 1.2974,
"step": 359
},
{
"epoch": 0.54,
"learning_rate": 9.323865504305566e-06,
"loss": 1.2973,
"step": 360
},
{
"epoch": 0.54,
"learning_rate": 9.275651998382377e-06,
"loss": 1.3248,
"step": 361
},
{
"epoch": 0.54,
"learning_rate": 9.227455413203115e-06,
"loss": 1.2908,
"step": 362
},
{
"epoch": 0.54,
"learning_rate": 9.179276874638315e-06,
"loss": 1.2986,
"step": 363
},
{
"epoch": 0.54,
"learning_rate": 9.131117508136953e-06,
"loss": 1.267,
"step": 364
},
{
"epoch": 0.54,
"learning_rate": 9.082978438700138e-06,
"loss": 1.2513,
"step": 365
},
{
"epoch": 0.54,
"learning_rate": 9.034860790854848e-06,
"loss": 1.3122,
"step": 366
},
{
"epoch": 0.55,
"learning_rate": 8.986765688627652e-06,
"loss": 1.2326,
"step": 367
},
{
"epoch": 0.55,
"learning_rate": 8.938694255518444e-06,
"loss": 1.3432,
"step": 368
},
{
"epoch": 0.55,
"learning_rate": 8.890647614474223e-06,
"loss": 1.245,
"step": 369
},
{
"epoch": 0.55,
"learning_rate": 8.842626887862832e-06,
"loss": 1.2181,
"step": 370
},
{
"epoch": 0.55,
"learning_rate": 8.79463319744677e-06,
"loss": 1.2605,
"step": 371
},
{
"epoch": 0.55,
"learning_rate": 8.746667664356957e-06,
"loss": 1.2901,
"step": 372
},
{
"epoch": 0.56,
"learning_rate": 8.69873140906657e-06,
"loss": 1.2793,
"step": 373
},
{
"epoch": 0.56,
"learning_rate": 8.650825551364844e-06,
"loss": 1.2719,
"step": 374
},
{
"epoch": 0.56,
"learning_rate": 8.602951210330942e-06,
"loss": 1.1598,
"step": 375
},
{
"epoch": 0.56,
"learning_rate": 8.55510950430779e-06,
"loss": 1.3019,
"step": 376
},
{
"epoch": 0.56,
"learning_rate": 8.50730155087596e-06,
"loss": 1.2954,
"step": 377
},
{
"epoch": 0.56,
"learning_rate": 8.459528466827576e-06,
"loss": 1.28,
"step": 378
},
{
"epoch": 0.56,
"learning_rate": 8.411791368140197e-06,
"loss": 1.2873,
"step": 379
},
{
"epoch": 0.57,
"learning_rate": 8.364091369950783e-06,
"loss": 1.3165,
"step": 380
},
{
"epoch": 0.57,
"learning_rate": 8.316429586529616e-06,
"loss": 1.2493,
"step": 381
},
{
"epoch": 0.57,
"learning_rate": 8.268807131254288e-06,
"loss": 1.3167,
"step": 382
},
{
"epoch": 0.57,
"learning_rate": 8.221225116583677e-06,
"loss": 1.264,
"step": 383
},
{
"epoch": 0.57,
"learning_rate": 8.17368465403199e-06,
"loss": 1.2593,
"step": 384
},
{
"epoch": 0.57,
"learning_rate": 8.126186854142752e-06,
"loss": 1.303,
"step": 385
},
{
"epoch": 0.57,
"learning_rate": 8.078732826462917e-06,
"loss": 1.2191,
"step": 386
},
{
"epoch": 0.58,
"learning_rate": 8.0313236795169e-06,
"loss": 1.2881,
"step": 387
},
{
"epoch": 0.58,
"learning_rate": 7.983960520780712e-06,
"loss": 1.2278,
"step": 388
},
{
"epoch": 0.58,
"learning_rate": 7.936644456656082e-06,
"loss": 1.2647,
"step": 389
},
{
"epoch": 0.58,
"learning_rate": 7.889376592444605e-06,
"loss": 1.2423,
"step": 390
},
{
"epoch": 0.58,
"learning_rate": 7.84215803232194e-06,
"loss": 1.2385,
"step": 391
},
{
"epoch": 0.58,
"learning_rate": 7.794989879311991e-06,
"loss": 1.288,
"step": 392
},
{
"epoch": 0.58,
"learning_rate": 7.747873235261157e-06,
"loss": 1.1935,
"step": 393
},
{
"epoch": 0.59,
"learning_rate": 7.700809200812596e-06,
"loss": 1.2566,
"step": 394
},
{
"epoch": 0.59,
"learning_rate": 7.6537988753805e-06,
"loss": 1.2961,
"step": 395
},
{
"epoch": 0.59,
"learning_rate": 7.606843357124426e-06,
"loss": 1.2606,
"step": 396
},
{
"epoch": 0.59,
"learning_rate": 7.559943742923626e-06,
"loss": 1.279,
"step": 397
},
{
"epoch": 0.59,
"learning_rate": 7.513101128351454e-06,
"loss": 1.2082,
"step": 398
},
{
"epoch": 0.59,
"learning_rate": 7.4663166076497376e-06,
"loss": 1.2393,
"step": 399
},
{
"epoch": 0.6,
"learning_rate": 7.419591273703245e-06,
"loss": 1.2876,
"step": 400
},
{
"epoch": 0.6,
"learning_rate": 7.372926218014131e-06,
"loss": 1.2556,
"step": 401
},
{
"epoch": 0.6,
"learning_rate": 7.326322530676471e-06,
"loss": 1.2951,
"step": 402
},
{
"epoch": 0.6,
"learning_rate": 7.279781300350758e-06,
"loss": 1.2807,
"step": 403
},
{
"epoch": 0.6,
"learning_rate": 7.23330361423851e-06,
"loss": 1.1951,
"step": 404
},
{
"epoch": 0.6,
"learning_rate": 7.186890558056836e-06,
"loss": 1.2544,
"step": 405
},
{
"epoch": 0.6,
"learning_rate": 7.140543216013109e-06,
"loss": 1.3186,
"step": 406
},
{
"epoch": 0.61,
"learning_rate": 7.094262670779611e-06,
"loss": 1.2609,
"step": 407
},
{
"epoch": 0.61,
"learning_rate": 7.048050003468252e-06,
"loss": 1.2355,
"step": 408
},
{
"epoch": 0.61,
"learning_rate": 7.001906293605329e-06,
"loss": 1.3441,
"step": 409
},
{
"epoch": 0.61,
"learning_rate": 6.9558326191062775e-06,
"loss": 1.2555,
"step": 410
},
{
"epoch": 0.61,
"learning_rate": 6.909830056250527e-06,
"loss": 1.2804,
"step": 411
},
{
"epoch": 0.61,
"learning_rate": 6.8638996796563275e-06,
"loss": 1.2558,
"step": 412
},
{
"epoch": 0.61,
"learning_rate": 6.81804256225567e-06,
"loss": 1.2442,
"step": 413
},
{
"epoch": 0.62,
"learning_rate": 6.772259775269203e-06,
"loss": 1.3093,
"step": 414
},
{
"epoch": 0.62,
"learning_rate": 6.7265523881812335e-06,
"loss": 1.2746,
"step": 415
},
{
"epoch": 0.62,
"learning_rate": 6.680921468714718e-06,
"loss": 1.2415,
"step": 416
},
{
"epoch": 0.62,
"learning_rate": 6.6353680828063306e-06,
"loss": 1.2565,
"step": 417
},
{
"epoch": 0.62,
"learning_rate": 6.589893294581579e-06,
"loss": 1.2718,
"step": 418
},
{
"epoch": 0.62,
"learning_rate": 6.5444981663299135e-06,
"loss": 1.2559,
"step": 419
},
{
"epoch": 0.63,
"learning_rate": 6.499183758479944e-06,
"loss": 1.2666,
"step": 420
},
{
"epoch": 0.63,
"learning_rate": 6.453951129574644e-06,
"loss": 1.2722,
"step": 421
},
{
"epoch": 0.63,
"learning_rate": 6.408801336246645e-06,
"loss": 1.2676,
"step": 422
},
{
"epoch": 0.63,
"learning_rate": 6.36373543319353e-06,
"loss": 1.3094,
"step": 423
},
{
"epoch": 0.63,
"learning_rate": 6.318754473153221e-06,
"loss": 1.2831,
"step": 424
},
{
"epoch": 0.63,
"learning_rate": 6.273859506879365e-06,
"loss": 1.2935,
"step": 425
},
{
"epoch": 0.63,
"learning_rate": 6.229051583116796e-06,
"loss": 1.2686,
"step": 426
},
{
"epoch": 0.64,
"learning_rate": 6.18433174857705e-06,
"loss": 1.3003,
"step": 427
},
{
"epoch": 0.64,
"learning_rate": 6.139701047913885e-06,
"loss": 1.2519,
"step": 428
},
{
"epoch": 0.64,
"learning_rate": 6.095160523698913e-06,
"loss": 1.2345,
"step": 429
},
{
"epoch": 0.64,
"learning_rate": 6.050711216397212e-06,
"loss": 1.2453,
"step": 430
},
{
"epoch": 0.64,
"learning_rate": 6.006354164343047e-06,
"loss": 1.2333,
"step": 431
},
{
"epoch": 0.64,
"learning_rate": 5.962090403715592e-06,
"loss": 1.297,
"step": 432
},
{
"epoch": 0.64,
"learning_rate": 5.9179209685147525e-06,
"loss": 1.2697,
"step": 433
},
{
"epoch": 0.65,
"learning_rate": 5.873846890536976e-06,
"loss": 1.1891,
"step": 434
},
{
"epoch": 0.65,
"learning_rate": 5.829869199351188e-06,
"loss": 1.2085,
"step": 435
},
{
"epoch": 0.65,
"learning_rate": 5.785988922274711e-06,
"loss": 1.2834,
"step": 436
},
{
"epoch": 0.65,
"learning_rate": 5.742207084349274e-06,
"loss": 1.2407,
"step": 437
},
{
"epoch": 0.65,
"learning_rate": 5.698524708317082e-06,
"loss": 1.2343,
"step": 438
},
{
"epoch": 0.65,
"learning_rate": 5.654942814596902e-06,
"loss": 1.2673,
"step": 439
},
{
"epoch": 0.65,
"learning_rate": 5.611462421260251e-06,
"loss": 1.2289,
"step": 440
},
{
"epoch": 0.66,
"learning_rate": 5.5680845440075885e-06,
"loss": 1.1601,
"step": 441
},
{
"epoch": 0.66,
"learning_rate": 5.5248101961446065e-06,
"loss": 1.1937,
"step": 442
},
{
"epoch": 0.66,
"learning_rate": 5.481640388558551e-06,
"loss": 1.287,
"step": 443
},
{
"epoch": 0.66,
"learning_rate": 5.43857612969462e-06,
"loss": 1.302,
"step": 444
},
{
"epoch": 0.66,
"learning_rate": 5.39561842553239e-06,
"loss": 1.2488,
"step": 445
},
{
"epoch": 0.66,
"learning_rate": 5.352768279562315e-06,
"loss": 1.2708,
"step": 446
},
{
"epoch": 0.67,
"learning_rate": 5.310026692762316e-06,
"loss": 1.2296,
"step": 447
},
{
"epoch": 0.67,
"learning_rate": 5.267394663574351e-06,
"loss": 1.2844,
"step": 448
},
{
"epoch": 0.67,
"learning_rate": 5.224873187881136e-06,
"loss": 1.2818,
"step": 449
},
{
"epoch": 0.67,
"learning_rate": 5.1824632589828465e-06,
"loss": 1.2913,
"step": 450
},
{
"epoch": 0.67,
"learning_rate": 5.14016586757394e-06,
"loss": 1.1986,
"step": 451
},
{
"epoch": 0.67,
"learning_rate": 5.097982001719994e-06,
"loss": 1.2389,
"step": 452
},
{
"epoch": 0.67,
"learning_rate": 5.0559126468346354e-06,
"loss": 1.2424,
"step": 453
},
{
"epoch": 0.68,
"learning_rate": 5.013958785656516e-06,
"loss": 1.2219,
"step": 454
},
{
"epoch": 0.68,
"learning_rate": 4.972121398226371e-06,
"loss": 1.3056,
"step": 455
},
{
"epoch": 0.68,
"learning_rate": 4.930401461864099e-06,
"loss": 1.2937,
"step": 456
},
{
"epoch": 0.68,
"learning_rate": 4.888799951145948e-06,
"loss": 1.2016,
"step": 457
},
{
"epoch": 0.68,
"learning_rate": 4.847317837881757e-06,
"loss": 1.2371,
"step": 458
},
{
"epoch": 0.68,
"learning_rate": 4.805956091092228e-06,
"loss": 1.2854,
"step": 459
},
{
"epoch": 0.68,
"learning_rate": 4.764715676986327e-06,
"loss": 1.2802,
"step": 460
},
{
"epoch": 0.69,
"learning_rate": 4.7235975589386715e-06,
"loss": 1.201,
"step": 461
},
{
"epoch": 0.69,
"learning_rate": 4.6826026974670665e-06,
"loss": 1.2367,
"step": 462
},
{
"epoch": 0.69,
"learning_rate": 4.641732050210032e-06,
"loss": 1.2973,
"step": 463
},
{
"epoch": 0.69,
"learning_rate": 4.600986571904461e-06,
"loss": 1.2414,
"step": 464
},
{
"epoch": 0.69,
"learning_rate": 4.560367214363295e-06,
"loss": 1.3178,
"step": 465
},
{
"epoch": 0.69,
"learning_rate": 4.519874926453303e-06,
"loss": 1.282,
"step": 466
},
{
"epoch": 0.7,
"learning_rate": 4.479510654072909e-06,
"loss": 1.2451,
"step": 467
},
{
"epoch": 0.7,
"learning_rate": 4.439275340130099e-06,
"loss": 1.253,
"step": 468
},
{
"epoch": 0.7,
"learning_rate": 4.399169924520403e-06,
"loss": 1.2678,
"step": 469
},
{
"epoch": 0.7,
"learning_rate": 4.359195344104916e-06,
"loss": 1.2431,
"step": 470
},
{
"epoch": 0.7,
"learning_rate": 4.319352532688444e-06,
"loss": 1.2509,
"step": 471
},
{
"epoch": 0.7,
"learning_rate": 4.279642420997655e-06,
"loss": 1.2233,
"step": 472
},
{
"epoch": 0.7,
"learning_rate": 4.240065936659374e-06,
"loss": 1.2887,
"step": 473
},
{
"epoch": 0.71,
"learning_rate": 4.200624004178883e-06,
"loss": 1.2884,
"step": 474
},
{
"epoch": 0.71,
"learning_rate": 4.161317544918345e-06,
"loss": 1.2143,
"step": 475
},
{
"epoch": 0.71,
"learning_rate": 4.12214747707527e-06,
"loss": 1.2523,
"step": 476
},
{
"epoch": 0.71,
"learning_rate": 4.083114715661069e-06,
"loss": 1.2383,
"step": 477
},
{
"epoch": 0.71,
"learning_rate": 4.044220172479675e-06,
"loss": 1.2751,
"step": 478
},
{
"epoch": 0.71,
"learning_rate": 4.0054647561062625e-06,
"loss": 1.242,
"step": 479
},
{
"epoch": 0.71,
"learning_rate": 3.9668493718659924e-06,
"loss": 1.2541,
"step": 480
},
{
"epoch": 0.72,
"learning_rate": 3.9283749218128885e-06,
"loss": 1.2416,
"step": 481
},
{
"epoch": 0.72,
"learning_rate": 3.890042304708758e-06,
"loss": 1.3218,
"step": 482
},
{
"epoch": 0.72,
"learning_rate": 3.8518524160021876e-06,
"loss": 1.2254,
"step": 483
},
{
"epoch": 0.72,
"learning_rate": 3.813806147807645e-06,
"loss": 1.2653,
"step": 484
},
{
"epoch": 0.72,
"learning_rate": 3.775904388884618e-06,
"loss": 1.2374,
"step": 485
},
{
"epoch": 0.72,
"learning_rate": 3.738148024616863e-06,
"loss": 1.2748,
"step": 486
},
{
"epoch": 0.72,
"learning_rate": 3.700537936991733e-06,
"loss": 1.2403,
"step": 487
},
{
"epoch": 0.73,
"learning_rate": 3.6630750045795472e-06,
"loss": 1.2698,
"step": 488
},
{
"epoch": 0.73,
"learning_rate": 3.625760102513103e-06,
"loss": 1.2395,
"step": 489
},
{
"epoch": 0.73,
"learning_rate": 3.5885941024672e-06,
"loss": 1.2069,
"step": 490
},
{
"epoch": 0.73,
"learning_rate": 3.5515778726382967e-06,
"loss": 1.1868,
"step": 491
},
{
"epoch": 0.73,
"learning_rate": 3.5147122777242203e-06,
"loss": 1.2782,
"step": 492
},
{
"epoch": 0.73,
"learning_rate": 3.477998178903982e-06,
"loss": 1.2558,
"step": 493
},
{
"epoch": 0.74,
"learning_rate": 3.441436433817641e-06,
"loss": 1.2606,
"step": 494
},
{
"epoch": 0.74,
"learning_rate": 3.405027896546277e-06,
"loss": 1.2529,
"step": 495
},
{
"epoch": 0.74,
"learning_rate": 3.3687734175920505e-06,
"loss": 1.2769,
"step": 496
},
{
"epoch": 0.74,
"learning_rate": 3.3326738438583116e-06,
"loss": 1.233,
"step": 497
},
{
"epoch": 0.74,
"learning_rate": 3.2967300186298456e-06,
"loss": 1.2171,
"step": 498
},
{
"epoch": 0.74,
"learning_rate": 3.2609427815531426e-06,
"loss": 1.2684,
"step": 499
},
{
"epoch": 0.74,
"learning_rate": 3.2253129686168105e-06,
"loss": 1.2783,
"step": 500
},
{
"epoch": 0.75,
"learning_rate": 3.1898414121320277e-06,
"loss": 1.2487,
"step": 501
},
{
"epoch": 0.75,
"learning_rate": 3.1545289407131128e-06,
"loss": 1.2257,
"step": 502
},
{
"epoch": 0.75,
"learning_rate": 3.11937637925816e-06,
"loss": 1.2122,
"step": 503
},
{
"epoch": 0.75,
"learning_rate": 3.0843845489297698e-06,
"loss": 1.239,
"step": 504
},
{
"epoch": 0.75,
"learning_rate": 3.0495542671358745e-06,
"loss": 1.1959,
"step": 505
},
{
"epoch": 0.75,
"learning_rate": 3.0148863475106315e-06,
"loss": 1.322,
"step": 506
},
{
"epoch": 0.75,
"learning_rate": 2.9803815998954334e-06,
"loss": 1.2692,
"step": 507
},
{
"epoch": 0.76,
"learning_rate": 2.9460408303199696e-06,
"loss": 1.2184,
"step": 508
},
{
"epoch": 0.76,
"learning_rate": 2.9118648409834205e-06,
"loss": 1.2474,
"step": 509
},
{
"epoch": 0.76,
"learning_rate": 2.8778544302356904e-06,
"loss": 1.2498,
"step": 510
},
{
"epoch": 0.76,
"learning_rate": 2.8440103925587904e-06,
"loss": 1.2443,
"step": 511
},
{
"epoch": 0.76,
"learning_rate": 2.810333518548246e-06,
"loss": 1.2884,
"step": 512
},
{
"epoch": 0.76,
"learning_rate": 2.7768245948946615e-06,
"loss": 1.1899,
"step": 513
},
{
"epoch": 0.77,
"learning_rate": 2.743484404365314e-06,
"loss": 1.245,
"step": 514
},
{
"epoch": 0.77,
"learning_rate": 2.7103137257858867e-06,
"loss": 1.1911,
"step": 515
},
{
"epoch": 0.77,
"learning_rate": 2.6773133340222677e-06,
"loss": 1.2697,
"step": 516
},
{
"epoch": 0.77,
"learning_rate": 2.6444839999624496e-06,
"loss": 1.3149,
"step": 517
},
{
"epoch": 0.77,
"learning_rate": 2.61182649049853e-06,
"loss": 1.2549,
"step": 518
},
{
"epoch": 0.77,
"learning_rate": 2.5793415685087797e-06,
"loss": 1.2726,
"step": 519
},
{
"epoch": 0.77,
"learning_rate": 2.5470299928398424e-06,
"loss": 1.1909,
"step": 520
},
{
"epoch": 0.78,
"learning_rate": 2.514892518288988e-06,
"loss": 1.1946,
"step": 521
},
{
"epoch": 0.78,
"learning_rate": 2.4829298955865022e-06,
"loss": 1.2669,
"step": 522
},
{
"epoch": 0.78,
"learning_rate": 2.451142871378124e-06,
"loss": 1.2515,
"step": 523
},
{
"epoch": 0.78,
"learning_rate": 2.4195321882076295e-06,
"loss": 1.2561,
"step": 524
},
{
"epoch": 0.78,
"learning_rate": 2.3880985844994674e-06,
"loss": 1.2367,
"step": 525
},
{
"epoch": 0.78,
"learning_rate": 2.3568427945415163e-06,
"loss": 1.2895,
"step": 526
},
{
"epoch": 0.78,
"learning_rate": 2.3257655484679376e-06,
"loss": 1.2103,
"step": 527
},
{
"epoch": 0.79,
"learning_rate": 2.2948675722421086e-06,
"loss": 1.2231,
"step": 528
},
{
"epoch": 0.79,
"learning_rate": 2.2641495876396713e-06,
"loss": 1.2337,
"step": 529
},
{
"epoch": 0.79,
"learning_rate": 2.2336123122316642e-06,
"loss": 1.2965,
"step": 530
},
{
"epoch": 0.79,
"learning_rate": 2.2032564593677773e-06,
"loss": 1.1466,
"step": 531
},
{
"epoch": 0.79,
"learning_rate": 2.1730827381596643e-06,
"loss": 1.2549,
"step": 532
},
{
"epoch": 0.79,
"learning_rate": 2.1430918534643996e-06,
"loss": 1.2029,
"step": 533
},
{
"epoch": 0.79,
"learning_rate": 2.1132845058679942e-06,
"loss": 1.2463,
"step": 534
},
{
"epoch": 0.8,
"learning_rate": 2.083661391669043e-06,
"loss": 1.1695,
"step": 535
},
{
"epoch": 0.8,
"learning_rate": 2.0542232028624585e-06,
"loss": 1.2532,
"step": 536
},
{
"epoch": 0.8,
"learning_rate": 2.024970627123295e-06,
"loss": 1.2172,
"step": 537
},
{
"epoch": 0.8,
"learning_rate": 1.9959043477907e-06,
"loss": 1.2227,
"step": 538
},
{
"epoch": 0.8,
"learning_rate": 1.967025043851939e-06,
"loss": 1.2273,
"step": 539
},
{
"epoch": 0.8,
"learning_rate": 1.9383333899265368e-06,
"loss": 1.1965,
"step": 540
},
{
"epoch": 0.81,
"learning_rate": 1.9098300562505266e-06,
"loss": 1.2331,
"step": 541
},
{
"epoch": 0.81,
"learning_rate": 1.8815157086607826e-06,
"loss": 1.2546,
"step": 542
},
{
"epoch": 0.81,
"learning_rate": 1.8533910085794714e-06,
"loss": 1.2212,
"step": 543
},
{
"epoch": 0.81,
"learning_rate": 1.8254566129985996e-06,
"loss": 1.264,
"step": 544
},
{
"epoch": 0.81,
"learning_rate": 1.7977131744646724e-06,
"loss": 1.1909,
"step": 545
},
{
"epoch": 0.81,
"learning_rate": 1.7701613410634367e-06,
"loss": 1.2418,
"step": 546
},
{
"epoch": 0.81,
"learning_rate": 1.7428017564047594e-06,
"loss": 1.2505,
"step": 547
},
{
"epoch": 0.82,
"learning_rate": 1.7156350596075743e-06,
"loss": 1.2874,
"step": 548
},
{
"epoch": 0.82,
"learning_rate": 1.6886618852849723e-06,
"loss": 1.2475,
"step": 549
},
{
"epoch": 0.82,
"learning_rate": 1.6618828635293538e-06,
"loss": 1.2224,
"step": 550
},
{
"epoch": 0.82,
"learning_rate": 1.6352986198977327e-06,
"loss": 1.1563,
"step": 551
},
{
"epoch": 0.82,
"learning_rate": 1.6089097753971061e-06,
"loss": 1.2175,
"step": 552
},
{
"epoch": 0.82,
"learning_rate": 1.5827169464699576e-06,
"loss": 1.1903,
"step": 553
},
{
"epoch": 0.82,
"learning_rate": 1.5567207449798517e-06,
"loss": 1.2403,
"step": 554
},
{
"epoch": 0.83,
"learning_rate": 1.5309217781971419e-06,
"loss": 1.205,
"step": 555
},
{
"epoch": 0.83,
"learning_rate": 1.5053206487847916e-06,
"loss": 1.2447,
"step": 556
},
{
"epoch": 0.83,
"learning_rate": 1.4799179547842823e-06,
"loss": 1.208,
"step": 557
},
{
"epoch": 0.83,
"learning_rate": 1.454714289601661e-06,
"loss": 1.2491,
"step": 558
},
{
"epoch": 0.83,
"learning_rate": 1.4297102419936559e-06,
"loss": 1.2412,
"step": 559
},
{
"epoch": 0.83,
"learning_rate": 1.4049063960539488e-06,
"loss": 1.2431,
"step": 560
},
{
"epoch": 0.83,
"learning_rate": 1.3803033311995072e-06,
"loss": 1.206,
"step": 561
},
{
"epoch": 0.84,
"learning_rate": 1.3559016221570663e-06,
"loss": 1.2426,
"step": 562
},
{
"epoch": 0.84,
"learning_rate": 1.3317018389496927e-06,
"loss": 1.2978,
"step": 563
},
{
"epoch": 0.84,
"learning_rate": 1.3077045468834714e-06,
"loss": 1.2363,
"step": 564
},
{
"epoch": 0.84,
"learning_rate": 1.2839103065343084e-06,
"loss": 1.2608,
"step": 565
},
{
"epoch": 0.84,
"learning_rate": 1.2603196737348211e-06,
"loss": 1.1973,
"step": 566
},
{
"epoch": 0.84,
"learning_rate": 1.2369331995613664e-06,
"loss": 1.2315,
"step": 567
},
{
"epoch": 0.85,
"learning_rate": 1.213751430321156e-06,
"loss": 1.2313,
"step": 568
},
{
"epoch": 0.85,
"learning_rate": 1.1907749075395147e-06,
"loss": 1.2884,
"step": 569
},
{
"epoch": 0.85,
"learning_rate": 1.168004167947202e-06,
"loss": 1.1736,
"step": 570
},
{
"epoch": 0.85,
"learning_rate": 1.1454397434679022e-06,
"loss": 1.2719,
"step": 571
},
{
"epoch": 0.85,
"learning_rate": 1.123082161205775e-06,
"loss": 1.2442,
"step": 572
},
{
"epoch": 0.85,
"learning_rate": 1.1009319434331623e-06,
"loss": 1.1979,
"step": 573
},
{
"epoch": 0.85,
"learning_rate": 1.0789896075783734e-06,
"loss": 1.2035,
"step": 574
},
{
"epoch": 0.86,
"learning_rate": 1.0572556662136036e-06,
"loss": 1.1973,
"step": 575
},
{
"epoch": 0.86,
"learning_rate": 1.0357306270429623e-06,
"loss": 1.2564,
"step": 576
},
{
"epoch": 0.86,
"learning_rate": 1.014414992890611e-06,
"loss": 1.2806,
"step": 577
},
{
"epoch": 0.86,
"learning_rate": 9.93309261689015e-07,
"loss": 1.2205,
"step": 578
},
{
"epoch": 0.86,
"learning_rate": 9.724139264673116e-07,
"loss": 1.2583,
"step": 579
},
{
"epoch": 0.86,
"learning_rate": 9.517294753398066e-07,
"loss": 1.1722,
"step": 580
},
{
"epoch": 0.86,
"learning_rate": 9.312563914945461e-07,
"loss": 1.2193,
"step": 581
},
{
"epoch": 0.87,
"learning_rate": 9.10995153182056e-07,
"loss": 1.212,
"step": 582
},
{
"epoch": 0.87,
"learning_rate": 8.909462337041508e-07,
"loss": 1.2481,
"step": 583
},
{
"epoch": 0.87,
"learning_rate": 8.711101014028855e-07,
"loss": 1.1747,
"step": 584
},
{
"epoch": 0.87,
"learning_rate": 8.514872196496182e-07,
"loss": 1.2399,
"step": 585
},
{
"epoch": 0.87,
"learning_rate": 8.320780468341761e-07,
"loss": 1.1957,
"step": 586
},
{
"epoch": 0.87,
"learning_rate": 8.128830363541574e-07,
"loss": 1.1571,
"step": 587
},
{
"epoch": 0.88,
"learning_rate": 7.939026366043323e-07,
"loss": 1.2122,
"step": 588
},
{
"epoch": 0.88,
"learning_rate": 7.75137290966177e-07,
"loss": 1.1939,
"step": 589
},
{
"epoch": 0.88,
"learning_rate": 7.565874377975046e-07,
"loss": 1.2378,
"step": 590
},
{
"epoch": 0.88,
"learning_rate": 7.382535104222366e-07,
"loss": 1.2314,
"step": 591
},
{
"epoch": 0.88,
"learning_rate": 7.201359371202698e-07,
"loss": 1.3062,
"step": 592
},
{
"epoch": 0.88,
"learning_rate": 7.022351411174866e-07,
"loss": 1.2114,
"step": 593
},
{
"epoch": 0.88,
"learning_rate": 6.845515405758518e-07,
"loss": 1.2199,
"step": 594
},
{
"epoch": 0.89,
"learning_rate": 6.670855485836525e-07,
"loss": 1.1837,
"step": 595
},
{
"epoch": 0.89,
"learning_rate": 6.498375731458529e-07,
"loss": 1.2768,
"step": 596
},
{
"epoch": 0.89,
"learning_rate": 6.32808017174551e-07,
"loss": 1.1881,
"step": 597
},
{
"epoch": 0.89,
"learning_rate": 6.159972784795798e-07,
"loss": 1.1876,
"step": 598
},
{
"epoch": 0.89,
"learning_rate": 5.994057497592032e-07,
"loss": 1.2634,
"step": 599
},
{
"epoch": 0.89,
"learning_rate": 5.830338185909545e-07,
"loss": 1.2416,
"step": 600
},
{
"epoch": 0.89,
"learning_rate": 5.668818674225684e-07,
"loss": 1.2112,
"step": 601
},
{
"epoch": 0.9,
"learning_rate": 5.509502735630601e-07,
"loss": 1.2512,
"step": 602
},
{
"epoch": 0.9,
"learning_rate": 5.352394091739022e-07,
"loss": 1.205,
"step": 603
},
{
"epoch": 0.9,
"learning_rate": 5.197496412603365e-07,
"loss": 1.2531,
"step": 604
},
{
"epoch": 0.9,
"learning_rate": 5.044813316627994e-07,
"loss": 1.1902,
"step": 605
},
{
"epoch": 0.9,
"learning_rate": 4.894348370484648e-07,
"loss": 1.2178,
"step": 606
},
{
"epoch": 0.9,
"learning_rate": 4.746105089029229e-07,
"loss": 1.1809,
"step": 607
},
{
"epoch": 0.9,
"learning_rate": 4.6000869352195607e-07,
"loss": 1.2531,
"step": 608
},
{
"epoch": 0.91,
"learning_rate": 4.4562973200346413e-07,
"loss": 1.2166,
"step": 609
},
{
"epoch": 0.91,
"learning_rate": 4.314739602394791e-07,
"loss": 1.2276,
"step": 610
},
{
"epoch": 0.91,
"learning_rate": 4.1754170890833777e-07,
"loss": 1.2475,
"step": 611
},
{
"epoch": 0.91,
"learning_rate": 4.038333034669406e-07,
"loss": 1.2198,
"step": 612
},
{
"epoch": 0.91,
"learning_rate": 3.903490641431573e-07,
"loss": 1.2193,
"step": 613
},
{
"epoch": 0.91,
"learning_rate": 3.770893059283465e-07,
"loss": 1.2427,
"step": 614
},
{
"epoch": 0.92,
"learning_rate": 3.6405433856999684e-07,
"loss": 1.2744,
"step": 615
},
{
"epoch": 0.92,
"learning_rate": 3.5124446656448654e-07,
"loss": 1.2709,
"step": 616
},
{
"epoch": 0.92,
"learning_rate": 3.3865998914997645e-07,
"loss": 1.2479,
"step": 617
},
{
"epoch": 0.92,
"learning_rate": 3.2630120029942034e-07,
"loss": 1.2375,
"step": 618
},
{
"epoch": 0.92,
"learning_rate": 3.1416838871368925e-07,
"loss": 1.2396,
"step": 619
},
{
"epoch": 0.92,
"learning_rate": 3.0226183781483897e-07,
"loss": 1.1764,
"step": 620
},
{
"epoch": 0.92,
"learning_rate": 2.905818257394799e-07,
"loss": 1.2325,
"step": 621
},
{
"epoch": 0.93,
"learning_rate": 2.791286253322856e-07,
"loss": 1.2208,
"step": 622
},
{
"epoch": 0.93,
"learning_rate": 2.679025041396155e-07,
"loss": 1.2567,
"step": 623
},
{
"epoch": 0.93,
"learning_rate": 2.569037244032657e-07,
"loss": 1.1698,
"step": 624
},
{
"epoch": 0.93,
"learning_rate": 2.461325430543482e-07,
"loss": 1.1624,
"step": 625
},
{
"epoch": 0.93,
"learning_rate": 2.355892117072789e-07,
"loss": 1.2314,
"step": 626
},
{
"epoch": 0.93,
"learning_rate": 2.2527397665391026e-07,
"loss": 1.2763,
"step": 627
},
{
"epoch": 0.93,
"learning_rate": 2.1518707885777147e-07,
"loss": 1.1968,
"step": 628
},
{
"epoch": 0.94,
"learning_rate": 2.0532875394844053e-07,
"loss": 1.2502,
"step": 629
},
{
"epoch": 0.94,
"learning_rate": 1.9569923221604224e-07,
"loss": 1.2219,
"step": 630
},
{
"epoch": 0.94,
"learning_rate": 1.8629873860586567e-07,
"loss": 1.2245,
"step": 631
},
{
"epoch": 0.94,
"learning_rate": 1.7712749271311392e-07,
"loss": 1.214,
"step": 632
},
{
"epoch": 0.94,
"learning_rate": 1.681857087777672e-07,
"loss": 1.1827,
"step": 633
},
{
"epoch": 0.94,
"learning_rate": 1.5947359567958677e-07,
"loss": 1.2491,
"step": 634
},
{
"epoch": 0.95,
"learning_rate": 1.5099135693322776e-07,
"loss": 1.2284,
"step": 635
},
{
"epoch": 0.95,
"learning_rate": 1.4273919068349184e-07,
"loss": 1.1829,
"step": 636
},
{
"epoch": 0.95,
"learning_rate": 1.3471728970068986e-07,
"loss": 1.2225,
"step": 637
},
{
"epoch": 0.95,
"learning_rate": 1.2692584137615205e-07,
"loss": 1.2541,
"step": 638
},
{
"epoch": 0.95,
"learning_rate": 1.1936502771783488e-07,
"loss": 1.2206,
"step": 639
},
{
"epoch": 0.95,
"learning_rate": 1.1203502534608113e-07,
"loss": 1.2489,
"step": 640
},
{
"epoch": 0.95,
"learning_rate": 1.0493600548948879e-07,
"loss": 1.2834,
"step": 641
},
{
"epoch": 0.96,
"learning_rate": 9.806813398091419e-08,
"loss": 1.191,
"step": 642
},
{
"epoch": 0.96,
"learning_rate": 9.143157125359514e-08,
"loss": 1.1584,
"step": 643
},
{
"epoch": 0.96,
"learning_rate": 8.502647233740169e-08,
"loss": 1.1973,
"step": 644
},
{
"epoch": 0.96,
"learning_rate": 7.885298685522235e-08,
"loss": 1.2084,
"step": 645
},
{
"epoch": 0.96,
"learning_rate": 7.291125901946027e-08,
"loss": 1.2765,
"step": 646
},
{
"epoch": 0.96,
"learning_rate": 6.720142762867032e-08,
"loss": 1.2335,
"step": 647
},
{
"epoch": 0.96,
"learning_rate": 6.172362606431281e-08,
"loss": 1.2119,
"step": 648
},
{
"epoch": 0.97,
"learning_rate": 5.647798228764156e-08,
"loss": 1.2392,
"step": 649
},
{
"epoch": 0.97,
"learning_rate": 5.146461883671072e-08,
"loss": 1.2353,
"step": 650
},
{
"epoch": 0.97,
"learning_rate": 4.6683652823513725e-08,
"loss": 1.2426,
"step": 651
},
{
"epoch": 0.97,
"learning_rate": 4.2135195931249925e-08,
"loss": 1.2257,
"step": 652
},
{
"epoch": 0.97,
"learning_rate": 3.7819354411713364e-08,
"loss": 1.2124,
"step": 653
},
{
"epoch": 0.97,
"learning_rate": 3.373622908280916e-08,
"loss": 1.157,
"step": 654
},
{
"epoch": 0.97,
"learning_rate": 2.988591532620322e-08,
"loss": 1.2755,
"step": 655
},
{
"epoch": 0.98,
"learning_rate": 2.6268503085089547e-08,
"loss": 1.1833,
"step": 656
},
{
"epoch": 0.98,
"learning_rate": 2.2884076862089712e-08,
"loss": 1.2691,
"step": 657
},
{
"epoch": 0.98,
"learning_rate": 1.973271571728441e-08,
"loss": 1.2374,
"step": 658
},
{
"epoch": 0.98,
"learning_rate": 1.6814493266357202e-08,
"loss": 1.276,
"step": 659
},
{
"epoch": 0.98,
"learning_rate": 1.4129477678884728e-08,
"loss": 1.2091,
"step": 660
},
{
"epoch": 0.98,
"learning_rate": 1.1677731676733584e-08,
"loss": 1.1781,
"step": 661
},
{
"epoch": 0.99,
"learning_rate": 9.459312532608122e-09,
"loss": 1.2607,
"step": 662
},
{
"epoch": 0.99,
"learning_rate": 7.474272068698219e-09,
"loss": 1.2763,
"step": 663
},
{
"epoch": 0.99,
"learning_rate": 5.722656655482439e-09,
"loss": 1.1663,
"step": 664
},
{
"epoch": 0.99,
"learning_rate": 4.204507210633368e-09,
"loss": 1.2102,
"step": 665
},
{
"epoch": 0.99,
"learning_rate": 2.9198591980705847e-09,
"loss": 1.2141,
"step": 666
},
{
"epoch": 0.99,
"learning_rate": 1.8687426271246646e-09,
"loss": 1.2232,
"step": 667
},
{
"epoch": 0.99,
"learning_rate": 1.0511820518432915e-09,
"loss": 1.2098,
"step": 668
},
{
"epoch": 1.0,
"learning_rate": 4.671965704128312e-10,
"loss": 1.2296,
"step": 669
},
{
"epoch": 1.0,
"learning_rate": 1.167998247131319e-10,
"loss": 1.2456,
"step": 670
},
{
"epoch": 1.0,
"learning_rate": 0.0,
"loss": 1.1816,
"step": 671
},
{
"epoch": 1.0,
"step": 671,
"total_flos": 1.6993129412260004e+18,
"train_loss": 1.318009591315791,
"train_runtime": 13830.2591,
"train_samples_per_second": 6.218,
"train_steps_per_second": 0.049
}
],
"logging_steps": 1.0,
"max_steps": 671,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 7975,
"total_flos": 1.6993129412260004e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}