QinyuZhao1116's picture
Upload Model Checkpoint
fe0fd30
raw
history blame
211 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"global_step": 1746,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.0303030303030305e-06,
"loss": 1.946,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 6.060606060606061e-06,
"loss": 1.908,
"step": 2
},
{
"epoch": 0.01,
"learning_rate": 9.090909090909091e-06,
"loss": 2.1083,
"step": 3
},
{
"epoch": 0.01,
"learning_rate": 1.2121212121212122e-05,
"loss": 2.3218,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.5151515151515153e-05,
"loss": 1.8338,
"step": 5
},
{
"epoch": 0.01,
"learning_rate": 1.8181818181818182e-05,
"loss": 2.0202,
"step": 6
},
{
"epoch": 0.02,
"learning_rate": 2.1212121212121215e-05,
"loss": 2.1332,
"step": 7
},
{
"epoch": 0.02,
"learning_rate": 2.4242424242424244e-05,
"loss": 1.8593,
"step": 8
},
{
"epoch": 0.02,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.5359,
"step": 9
},
{
"epoch": 0.02,
"learning_rate": 3.0303030303030306e-05,
"loss": 1.327,
"step": 10
},
{
"epoch": 0.03,
"learning_rate": 3.3333333333333335e-05,
"loss": 1.7252,
"step": 11
},
{
"epoch": 0.03,
"learning_rate": 3.6363636363636364e-05,
"loss": 1.4351,
"step": 12
},
{
"epoch": 0.03,
"learning_rate": 3.939393939393939e-05,
"loss": 1.2774,
"step": 13
},
{
"epoch": 0.03,
"learning_rate": 4.242424242424243e-05,
"loss": 1.5145,
"step": 14
},
{
"epoch": 0.03,
"learning_rate": 4.545454545454546e-05,
"loss": 1.1529,
"step": 15
},
{
"epoch": 0.04,
"learning_rate": 4.848484848484849e-05,
"loss": 1.0047,
"step": 16
},
{
"epoch": 0.04,
"learning_rate": 5.151515151515152e-05,
"loss": 1.3872,
"step": 17
},
{
"epoch": 0.04,
"learning_rate": 5.4545454545454546e-05,
"loss": 1.1229,
"step": 18
},
{
"epoch": 0.04,
"learning_rate": 5.757575757575758e-05,
"loss": 1.3386,
"step": 19
},
{
"epoch": 0.05,
"learning_rate": 6.060606060606061e-05,
"loss": 1.2493,
"step": 20
},
{
"epoch": 0.05,
"learning_rate": 6.363636363636364e-05,
"loss": 1.1427,
"step": 21
},
{
"epoch": 0.05,
"learning_rate": 6.666666666666667e-05,
"loss": 1.0895,
"step": 22
},
{
"epoch": 0.05,
"learning_rate": 6.96969696969697e-05,
"loss": 1.1989,
"step": 23
},
{
"epoch": 0.05,
"learning_rate": 7.272727272727273e-05,
"loss": 1.0438,
"step": 24
},
{
"epoch": 0.06,
"learning_rate": 7.575757575757576e-05,
"loss": 1.176,
"step": 25
},
{
"epoch": 0.06,
"learning_rate": 7.878787878787879e-05,
"loss": 1.1372,
"step": 26
},
{
"epoch": 0.06,
"learning_rate": 8.181818181818183e-05,
"loss": 1.2983,
"step": 27
},
{
"epoch": 0.06,
"learning_rate": 8.484848484848486e-05,
"loss": 0.9371,
"step": 28
},
{
"epoch": 0.07,
"learning_rate": 8.787878787878789e-05,
"loss": 1.2299,
"step": 29
},
{
"epoch": 0.07,
"learning_rate": 9.090909090909092e-05,
"loss": 0.9441,
"step": 30
},
{
"epoch": 0.07,
"learning_rate": 9.393939393939395e-05,
"loss": 1.0011,
"step": 31
},
{
"epoch": 0.07,
"learning_rate": 9.696969696969698e-05,
"loss": 1.1704,
"step": 32
},
{
"epoch": 0.08,
"learning_rate": 0.0001,
"loss": 1.1193,
"step": 33
},
{
"epoch": 0.08,
"learning_rate": 0.00010303030303030303,
"loss": 1.1559,
"step": 34
},
{
"epoch": 0.08,
"learning_rate": 0.00010606060606060606,
"loss": 0.8677,
"step": 35
},
{
"epoch": 0.08,
"learning_rate": 0.00010909090909090909,
"loss": 1.0865,
"step": 36
},
{
"epoch": 0.08,
"learning_rate": 0.00011212121212121212,
"loss": 1.0922,
"step": 37
},
{
"epoch": 0.09,
"learning_rate": 0.00011515151515151516,
"loss": 0.9434,
"step": 38
},
{
"epoch": 0.09,
"learning_rate": 0.0001181818181818182,
"loss": 0.9144,
"step": 39
},
{
"epoch": 0.09,
"learning_rate": 0.00012121212121212122,
"loss": 0.9546,
"step": 40
},
{
"epoch": 0.09,
"learning_rate": 0.00012424242424242425,
"loss": 1.0654,
"step": 41
},
{
"epoch": 0.1,
"learning_rate": 0.00012727272727272728,
"loss": 0.8077,
"step": 42
},
{
"epoch": 0.1,
"learning_rate": 0.0001303030303030303,
"loss": 1.0758,
"step": 43
},
{
"epoch": 0.1,
"learning_rate": 0.00013333333333333334,
"loss": 1.1512,
"step": 44
},
{
"epoch": 0.1,
"learning_rate": 0.00013636363636363637,
"loss": 0.84,
"step": 45
},
{
"epoch": 0.11,
"learning_rate": 0.0001393939393939394,
"loss": 1.0567,
"step": 46
},
{
"epoch": 0.11,
"learning_rate": 0.00014242424242424243,
"loss": 1.0165,
"step": 47
},
{
"epoch": 0.11,
"learning_rate": 0.00014545454545454546,
"loss": 0.8678,
"step": 48
},
{
"epoch": 0.11,
"learning_rate": 0.00014848484848484849,
"loss": 1.055,
"step": 49
},
{
"epoch": 0.11,
"learning_rate": 0.00015151515151515152,
"loss": 1.0669,
"step": 50
},
{
"epoch": 0.12,
"learning_rate": 0.00015454545454545454,
"loss": 0.9915,
"step": 51
},
{
"epoch": 0.12,
"learning_rate": 0.00015757575757575757,
"loss": 0.993,
"step": 52
},
{
"epoch": 0.12,
"learning_rate": 0.0001606060606060606,
"loss": 1.1085,
"step": 53
},
{
"epoch": 0.12,
"learning_rate": 0.00016363636363636366,
"loss": 0.9391,
"step": 54
},
{
"epoch": 0.13,
"learning_rate": 0.0001666666666666667,
"loss": 0.975,
"step": 55
},
{
"epoch": 0.13,
"learning_rate": 0.00016969696969696972,
"loss": 1.0697,
"step": 56
},
{
"epoch": 0.13,
"learning_rate": 0.00017272727272727275,
"loss": 0.9462,
"step": 57
},
{
"epoch": 0.13,
"learning_rate": 0.00017575757575757578,
"loss": 1.1209,
"step": 58
},
{
"epoch": 0.14,
"learning_rate": 0.0001787878787878788,
"loss": 1.0648,
"step": 59
},
{
"epoch": 0.14,
"learning_rate": 0.00018181818181818183,
"loss": 0.9964,
"step": 60
},
{
"epoch": 0.14,
"learning_rate": 0.00018484848484848484,
"loss": 0.8451,
"step": 61
},
{
"epoch": 0.14,
"learning_rate": 0.0001878787878787879,
"loss": 0.8437,
"step": 62
},
{
"epoch": 0.14,
"learning_rate": 0.00019090909090909092,
"loss": 1.1271,
"step": 63
},
{
"epoch": 0.15,
"learning_rate": 0.00019393939393939395,
"loss": 1.161,
"step": 64
},
{
"epoch": 0.15,
"learning_rate": 0.00019696969696969698,
"loss": 1.0032,
"step": 65
},
{
"epoch": 0.15,
"learning_rate": 0.0002,
"loss": 1.1258,
"step": 66
},
{
"epoch": 0.15,
"learning_rate": 0.00019999988957695886,
"loss": 0.9543,
"step": 67
},
{
"epoch": 0.16,
"learning_rate": 0.00019999955830807923,
"loss": 1.0274,
"step": 68
},
{
"epoch": 0.16,
"learning_rate": 0.00019999900619409279,
"loss": 0.9334,
"step": 69
},
{
"epoch": 0.16,
"learning_rate": 0.0001999982332362188,
"loss": 1.0398,
"step": 70
},
{
"epoch": 0.16,
"learning_rate": 0.00019999723943616433,
"loss": 0.9049,
"step": 71
},
{
"epoch": 0.16,
"learning_rate": 0.00019999602479612417,
"loss": 0.7452,
"step": 72
},
{
"epoch": 0.17,
"learning_rate": 0.00019999458931878073,
"loss": 0.8762,
"step": 73
},
{
"epoch": 0.17,
"learning_rate": 0.00019999293300730427,
"loss": 1.0941,
"step": 74
},
{
"epoch": 0.17,
"learning_rate": 0.00019999105586535268,
"loss": 0.7713,
"step": 75
},
{
"epoch": 0.17,
"learning_rate": 0.00019998895789707154,
"loss": 0.9233,
"step": 76
},
{
"epoch": 0.18,
"learning_rate": 0.00019998663910709416,
"loss": 0.8634,
"step": 77
},
{
"epoch": 0.18,
"learning_rate": 0.00019998409950054146,
"loss": 0.9697,
"step": 78
},
{
"epoch": 0.18,
"learning_rate": 0.00019998133908302209,
"loss": 1.0816,
"step": 79
},
{
"epoch": 0.18,
"learning_rate": 0.0001999783578606323,
"loss": 0.9659,
"step": 80
},
{
"epoch": 0.19,
"learning_rate": 0.00019997515583995603,
"loss": 0.9644,
"step": 81
},
{
"epoch": 0.19,
"learning_rate": 0.00019997173302806478,
"loss": 0.8561,
"step": 82
},
{
"epoch": 0.19,
"learning_rate": 0.00019996808943251773,
"loss": 1.0016,
"step": 83
},
{
"epoch": 0.19,
"learning_rate": 0.0001999642250613616,
"loss": 0.8951,
"step": 84
},
{
"epoch": 0.19,
"learning_rate": 0.00019996013992313073,
"loss": 1.0157,
"step": 85
},
{
"epoch": 0.2,
"learning_rate": 0.00019995583402684694,
"loss": 0.9414,
"step": 86
},
{
"epoch": 0.2,
"learning_rate": 0.00019995130738201966,
"loss": 0.8097,
"step": 87
},
{
"epoch": 0.2,
"learning_rate": 0.00019994655999864582,
"loss": 0.8606,
"step": 88
},
{
"epoch": 0.2,
"learning_rate": 0.0001999415918872098,
"loss": 1.0427,
"step": 89
},
{
"epoch": 0.21,
"learning_rate": 0.00019993640305868352,
"loss": 0.9578,
"step": 90
},
{
"epoch": 0.21,
"learning_rate": 0.00019993099352452623,
"loss": 1.1097,
"step": 91
},
{
"epoch": 0.21,
"learning_rate": 0.00019992536329668478,
"loss": 0.8119,
"step": 92
},
{
"epoch": 0.21,
"learning_rate": 0.00019991951238759325,
"loss": 0.9915,
"step": 93
},
{
"epoch": 0.22,
"learning_rate": 0.0001999134408101731,
"loss": 0.838,
"step": 94
},
{
"epoch": 0.22,
"learning_rate": 0.00019990714857783326,
"loss": 0.8935,
"step": 95
},
{
"epoch": 0.22,
"learning_rate": 0.00019990063570446984,
"loss": 0.7914,
"step": 96
},
{
"epoch": 0.22,
"learning_rate": 0.00019989390220446622,
"loss": 0.8724,
"step": 97
},
{
"epoch": 0.22,
"learning_rate": 0.00019988694809269314,
"loss": 1.0374,
"step": 98
},
{
"epoch": 0.23,
"learning_rate": 0.00019987977338450845,
"loss": 0.9028,
"step": 99
},
{
"epoch": 0.23,
"learning_rate": 0.00019987237809575723,
"loss": 0.9986,
"step": 100
},
{
"epoch": 0.23,
"learning_rate": 0.00019986476224277165,
"loss": 1.113,
"step": 101
},
{
"epoch": 0.23,
"learning_rate": 0.00019985692584237108,
"loss": 0.8395,
"step": 102
},
{
"epoch": 0.24,
"learning_rate": 0.00019984886891186184,
"loss": 1.0134,
"step": 103
},
{
"epoch": 0.24,
"learning_rate": 0.0001998405914690374,
"loss": 0.8845,
"step": 104
},
{
"epoch": 0.24,
"learning_rate": 0.00019983209353217812,
"loss": 0.7507,
"step": 105
},
{
"epoch": 0.24,
"learning_rate": 0.00019982337512005138,
"loss": 0.9073,
"step": 106
},
{
"epoch": 0.25,
"learning_rate": 0.00019981443625191148,
"loss": 0.9973,
"step": 107
},
{
"epoch": 0.25,
"learning_rate": 0.00019980527694749952,
"loss": 1.0733,
"step": 108
},
{
"epoch": 0.25,
"learning_rate": 0.00019979589722704346,
"loss": 0.9148,
"step": 109
},
{
"epoch": 0.25,
"learning_rate": 0.00019978629711125812,
"loss": 0.8385,
"step": 110
},
{
"epoch": 0.25,
"learning_rate": 0.00019977647662134488,
"loss": 0.75,
"step": 111
},
{
"epoch": 0.26,
"learning_rate": 0.00019976643577899195,
"loss": 0.9002,
"step": 112
},
{
"epoch": 0.26,
"learning_rate": 0.00019975617460637416,
"loss": 0.8754,
"step": 113
},
{
"epoch": 0.26,
"learning_rate": 0.0001997456931261529,
"loss": 0.8886,
"step": 114
},
{
"epoch": 0.26,
"learning_rate": 0.00019973499136147606,
"loss": 1.0058,
"step": 115
},
{
"epoch": 0.27,
"learning_rate": 0.00019972406933597812,
"loss": 0.9276,
"step": 116
},
{
"epoch": 0.27,
"learning_rate": 0.00019971292707377991,
"loss": 0.9922,
"step": 117
},
{
"epoch": 0.27,
"learning_rate": 0.00019970156459948873,
"loss": 0.9507,
"step": 118
},
{
"epoch": 0.27,
"learning_rate": 0.0001996899819381981,
"loss": 0.9619,
"step": 119
},
{
"epoch": 0.27,
"learning_rate": 0.00019967817911548794,
"loss": 0.8163,
"step": 120
},
{
"epoch": 0.28,
"learning_rate": 0.00019966615615742424,
"loss": 1.0647,
"step": 121
},
{
"epoch": 0.28,
"learning_rate": 0.0001996539130905593,
"loss": 0.9348,
"step": 122
},
{
"epoch": 0.28,
"learning_rate": 0.00019964144994193142,
"loss": 1.0523,
"step": 123
},
{
"epoch": 0.28,
"learning_rate": 0.000199628766739065,
"loss": 0.9063,
"step": 124
},
{
"epoch": 0.29,
"learning_rate": 0.00019961586350997033,
"loss": 1.0227,
"step": 125
},
{
"epoch": 0.29,
"learning_rate": 0.0001996027402831438,
"loss": 1.006,
"step": 126
},
{
"epoch": 0.29,
"learning_rate": 0.00019958939708756746,
"loss": 0.9082,
"step": 127
},
{
"epoch": 0.29,
"learning_rate": 0.00019957583395270923,
"loss": 0.8756,
"step": 128
},
{
"epoch": 0.3,
"learning_rate": 0.0001995620509085228,
"loss": 0.8311,
"step": 129
},
{
"epoch": 0.3,
"learning_rate": 0.00019954804798544745,
"loss": 1.0332,
"step": 130
},
{
"epoch": 0.3,
"learning_rate": 0.00019953382521440815,
"loss": 0.9427,
"step": 131
},
{
"epoch": 0.3,
"learning_rate": 0.00019951938262681527,
"loss": 0.838,
"step": 132
},
{
"epoch": 0.3,
"learning_rate": 0.0001995047202545647,
"loss": 0.8509,
"step": 133
},
{
"epoch": 0.31,
"learning_rate": 0.00019948983813003774,
"loss": 0.8944,
"step": 134
},
{
"epoch": 0.31,
"learning_rate": 0.00019947473628610099,
"loss": 0.9569,
"step": 135
},
{
"epoch": 0.31,
"learning_rate": 0.00019945941475610623,
"loss": 0.7805,
"step": 136
},
{
"epoch": 0.31,
"learning_rate": 0.00019944387357389052,
"loss": 0.9337,
"step": 137
},
{
"epoch": 0.32,
"learning_rate": 0.0001994281127737759,
"loss": 0.8712,
"step": 138
},
{
"epoch": 0.32,
"learning_rate": 0.0001994121323905695,
"loss": 0.9264,
"step": 139
},
{
"epoch": 0.32,
"learning_rate": 0.0001993959324595634,
"loss": 0.9323,
"step": 140
},
{
"epoch": 0.32,
"learning_rate": 0.00019937951301653444,
"loss": 0.8331,
"step": 141
},
{
"epoch": 0.33,
"learning_rate": 0.0001993628740977444,
"loss": 0.902,
"step": 142
},
{
"epoch": 0.33,
"learning_rate": 0.0001993460157399396,
"loss": 0.8676,
"step": 143
},
{
"epoch": 0.33,
"learning_rate": 0.00019932893798035116,
"loss": 0.8525,
"step": 144
},
{
"epoch": 0.33,
"learning_rate": 0.00019931164085669456,
"loss": 0.8571,
"step": 145
},
{
"epoch": 0.33,
"learning_rate": 0.00019929412440716985,
"loss": 1.0006,
"step": 146
},
{
"epoch": 0.34,
"learning_rate": 0.00019927638867046142,
"loss": 0.9849,
"step": 147
},
{
"epoch": 0.34,
"learning_rate": 0.00019925843368573794,
"loss": 0.9064,
"step": 148
},
{
"epoch": 0.34,
"learning_rate": 0.0001992402594926523,
"loss": 0.9716,
"step": 149
},
{
"epoch": 0.34,
"learning_rate": 0.0001992218661313415,
"loss": 0.7553,
"step": 150
},
{
"epoch": 0.35,
"learning_rate": 0.00019920325364242654,
"loss": 0.7921,
"step": 151
},
{
"epoch": 0.35,
"learning_rate": 0.00019918442206701245,
"loss": 0.7994,
"step": 152
},
{
"epoch": 0.35,
"learning_rate": 0.0001991653714466879,
"loss": 0.8296,
"step": 153
},
{
"epoch": 0.35,
"learning_rate": 0.00019914610182352548,
"loss": 0.8116,
"step": 154
},
{
"epoch": 0.36,
"learning_rate": 0.00019912661324008148,
"loss": 0.9844,
"step": 155
},
{
"epoch": 0.36,
"learning_rate": 0.00019910690573939557,
"loss": 0.865,
"step": 156
},
{
"epoch": 0.36,
"learning_rate": 0.00019908697936499103,
"loss": 0.959,
"step": 157
},
{
"epoch": 0.36,
"learning_rate": 0.00019906683416087448,
"loss": 0.7727,
"step": 158
},
{
"epoch": 0.36,
"learning_rate": 0.00019904647017153582,
"loss": 0.707,
"step": 159
},
{
"epoch": 0.37,
"learning_rate": 0.00019902588744194813,
"loss": 0.8597,
"step": 160
},
{
"epoch": 0.37,
"learning_rate": 0.00019900508601756756,
"loss": 0.9146,
"step": 161
},
{
"epoch": 0.37,
"learning_rate": 0.0001989840659443332,
"loss": 0.9571,
"step": 162
},
{
"epoch": 0.37,
"learning_rate": 0.0001989628272686671,
"loss": 0.8537,
"step": 163
},
{
"epoch": 0.38,
"learning_rate": 0.00019894137003747403,
"loss": 0.828,
"step": 164
},
{
"epoch": 0.38,
"learning_rate": 0.00019891969429814145,
"loss": 0.8055,
"step": 165
},
{
"epoch": 0.38,
"learning_rate": 0.0001988978000985394,
"loss": 0.8432,
"step": 166
},
{
"epoch": 0.38,
"learning_rate": 0.0001988756874870203,
"loss": 0.8101,
"step": 167
},
{
"epoch": 0.38,
"learning_rate": 0.00019885335651241903,
"loss": 0.9072,
"step": 168
},
{
"epoch": 0.39,
"learning_rate": 0.0001988308072240527,
"loss": 0.7862,
"step": 169
},
{
"epoch": 0.39,
"learning_rate": 0.00019880803967172047,
"loss": 0.8303,
"step": 170
},
{
"epoch": 0.39,
"learning_rate": 0.00019878505390570362,
"loss": 0.9489,
"step": 171
},
{
"epoch": 0.39,
"learning_rate": 0.0001987618499767653,
"loss": 1.0125,
"step": 172
},
{
"epoch": 0.4,
"learning_rate": 0.0001987384279361505,
"loss": 0.809,
"step": 173
},
{
"epoch": 0.4,
"learning_rate": 0.00019871478783558587,
"loss": 0.9488,
"step": 174
},
{
"epoch": 0.4,
"learning_rate": 0.0001986909297272796,
"loss": 0.9664,
"step": 175
},
{
"epoch": 0.4,
"learning_rate": 0.0001986668536639215,
"loss": 0.9657,
"step": 176
},
{
"epoch": 0.41,
"learning_rate": 0.0001986425596986825,
"loss": 0.8123,
"step": 177
},
{
"epoch": 0.41,
"learning_rate": 0.00019861804788521493,
"loss": 0.9482,
"step": 178
},
{
"epoch": 0.41,
"learning_rate": 0.00019859331827765212,
"loss": 0.879,
"step": 179
},
{
"epoch": 0.41,
"learning_rate": 0.00019856837093060848,
"loss": 0.896,
"step": 180
},
{
"epoch": 0.41,
"learning_rate": 0.00019854320589917927,
"loss": 1.0729,
"step": 181
},
{
"epoch": 0.42,
"learning_rate": 0.00019851782323894042,
"loss": 0.9844,
"step": 182
},
{
"epoch": 0.42,
"learning_rate": 0.0001984922230059486,
"loss": 0.9131,
"step": 183
},
{
"epoch": 0.42,
"learning_rate": 0.00019846640525674082,
"loss": 0.9417,
"step": 184
},
{
"epoch": 0.42,
"learning_rate": 0.00019844037004833473,
"loss": 0.9633,
"step": 185
},
{
"epoch": 0.43,
"learning_rate": 0.0001984141174382279,
"loss": 0.968,
"step": 186
},
{
"epoch": 0.43,
"learning_rate": 0.00019838764748439827,
"loss": 0.8447,
"step": 187
},
{
"epoch": 0.43,
"learning_rate": 0.00019836096024530373,
"loss": 0.8638,
"step": 188
},
{
"epoch": 0.43,
"learning_rate": 0.00019833405577988195,
"loss": 0.9346,
"step": 189
},
{
"epoch": 0.44,
"learning_rate": 0.0001983069341475504,
"loss": 0.8969,
"step": 190
},
{
"epoch": 0.44,
"learning_rate": 0.00019827959540820613,
"loss": 0.8499,
"step": 191
},
{
"epoch": 0.44,
"learning_rate": 0.00019825203962222572,
"loss": 0.8041,
"step": 192
},
{
"epoch": 0.44,
"learning_rate": 0.00019822426685046497,
"loss": 0.9216,
"step": 193
},
{
"epoch": 0.44,
"learning_rate": 0.00019819627715425903,
"loss": 0.906,
"step": 194
},
{
"epoch": 0.45,
"learning_rate": 0.000198168070595422,
"loss": 0.8969,
"step": 195
},
{
"epoch": 0.45,
"learning_rate": 0.000198139647236247,
"loss": 0.7949,
"step": 196
},
{
"epoch": 0.45,
"learning_rate": 0.00019811100713950587,
"loss": 0.8996,
"step": 197
},
{
"epoch": 0.45,
"learning_rate": 0.00019808215036844917,
"loss": 0.9118,
"step": 198
},
{
"epoch": 0.46,
"learning_rate": 0.0001980530769868059,
"loss": 0.7355,
"step": 199
},
{
"epoch": 0.46,
"learning_rate": 0.00019802378705878354,
"loss": 0.8344,
"step": 200
},
{
"epoch": 0.46,
"learning_rate": 0.00019799428064906774,
"loss": 0.9639,
"step": 201
},
{
"epoch": 0.46,
"learning_rate": 0.0001979645578228222,
"loss": 0.852,
"step": 202
},
{
"epoch": 0.47,
"learning_rate": 0.0001979346186456887,
"loss": 0.8493,
"step": 203
},
{
"epoch": 0.47,
"learning_rate": 0.00019790446318378665,
"loss": 0.851,
"step": 204
},
{
"epoch": 0.47,
"learning_rate": 0.00019787409150371328,
"loss": 0.7161,
"step": 205
},
{
"epoch": 0.47,
"learning_rate": 0.00019784350367254322,
"loss": 0.9846,
"step": 206
},
{
"epoch": 0.47,
"learning_rate": 0.0001978126997578285,
"loss": 0.7883,
"step": 207
},
{
"epoch": 0.48,
"learning_rate": 0.00019778167982759833,
"loss": 0.8691,
"step": 208
},
{
"epoch": 0.48,
"learning_rate": 0.00019775044395035907,
"loss": 0.928,
"step": 209
},
{
"epoch": 0.48,
"learning_rate": 0.0001977189921950939,
"loss": 0.8244,
"step": 210
},
{
"epoch": 0.48,
"learning_rate": 0.0001976873246312628,
"loss": 1.0413,
"step": 211
},
{
"epoch": 0.49,
"learning_rate": 0.0001976554413288023,
"loss": 0.8261,
"step": 212
},
{
"epoch": 0.49,
"learning_rate": 0.0001976233423581255,
"loss": 0.823,
"step": 213
},
{
"epoch": 0.49,
"learning_rate": 0.00019759102779012166,
"loss": 0.9386,
"step": 214
},
{
"epoch": 0.49,
"learning_rate": 0.00019755849769615628,
"loss": 0.8156,
"step": 215
},
{
"epoch": 0.49,
"learning_rate": 0.00019752575214807076,
"loss": 0.8556,
"step": 216
},
{
"epoch": 0.5,
"learning_rate": 0.00019749279121818235,
"loss": 0.7769,
"step": 217
},
{
"epoch": 0.5,
"learning_rate": 0.00019745961497928406,
"loss": 1.0772,
"step": 218
},
{
"epoch": 0.5,
"learning_rate": 0.00019742622350464418,
"loss": 0.8147,
"step": 219
},
{
"epoch": 0.5,
"learning_rate": 0.0001973926168680066,
"loss": 0.9529,
"step": 220
},
{
"epoch": 0.51,
"learning_rate": 0.00019735879514359018,
"loss": 0.8688,
"step": 221
},
{
"epoch": 0.51,
"learning_rate": 0.00019732475840608888,
"loss": 0.9647,
"step": 222
},
{
"epoch": 0.51,
"learning_rate": 0.00019729050673067156,
"loss": 0.837,
"step": 223
},
{
"epoch": 0.51,
"learning_rate": 0.00019725604019298163,
"loss": 0.9211,
"step": 224
},
{
"epoch": 0.52,
"learning_rate": 0.00019722135886913715,
"loss": 0.9434,
"step": 225
},
{
"epoch": 0.52,
"learning_rate": 0.0001971864628357304,
"loss": 0.6506,
"step": 226
},
{
"epoch": 0.52,
"learning_rate": 0.00019715135216982798,
"loss": 0.8052,
"step": 227
},
{
"epoch": 0.52,
"learning_rate": 0.00019711602694897037,
"loss": 0.7852,
"step": 228
},
{
"epoch": 0.52,
"learning_rate": 0.00019708048725117192,
"loss": 0.9283,
"step": 229
},
{
"epoch": 0.53,
"learning_rate": 0.0001970447331549207,
"loss": 0.9081,
"step": 230
},
{
"epoch": 0.53,
"learning_rate": 0.00019700876473917824,
"loss": 0.9036,
"step": 231
},
{
"epoch": 0.53,
"learning_rate": 0.00019697258208337934,
"loss": 0.716,
"step": 232
},
{
"epoch": 0.53,
"learning_rate": 0.00019693618526743197,
"loss": 0.8192,
"step": 233
},
{
"epoch": 0.54,
"learning_rate": 0.0001968995743717171,
"loss": 0.9773,
"step": 234
},
{
"epoch": 0.54,
"learning_rate": 0.00019686274947708848,
"loss": 0.8698,
"step": 235
},
{
"epoch": 0.54,
"learning_rate": 0.0001968257106648724,
"loss": 0.9062,
"step": 236
},
{
"epoch": 0.54,
"learning_rate": 0.00019678845801686764,
"loss": 0.8984,
"step": 237
},
{
"epoch": 0.55,
"learning_rate": 0.00019675099161534521,
"loss": 0.8087,
"step": 238
},
{
"epoch": 0.55,
"learning_rate": 0.00019671331154304822,
"loss": 0.8272,
"step": 239
},
{
"epoch": 0.55,
"learning_rate": 0.00019667541788319162,
"loss": 0.784,
"step": 240
},
{
"epoch": 0.55,
"learning_rate": 0.00019663731071946206,
"loss": 0.8777,
"step": 241
},
{
"epoch": 0.55,
"learning_rate": 0.00019659899013601772,
"loss": 0.8534,
"step": 242
},
{
"epoch": 0.56,
"learning_rate": 0.00019656045621748808,
"loss": 0.9645,
"step": 243
},
{
"epoch": 0.56,
"learning_rate": 0.00019652170904897387,
"loss": 0.9692,
"step": 244
},
{
"epoch": 0.56,
"learning_rate": 0.00019648274871604662,
"loss": 0.838,
"step": 245
},
{
"epoch": 0.56,
"learning_rate": 0.00019644357530474872,
"loss": 0.7445,
"step": 246
},
{
"epoch": 0.57,
"learning_rate": 0.0001964041889015931,
"loss": 0.9065,
"step": 247
},
{
"epoch": 0.57,
"learning_rate": 0.00019636458959356316,
"loss": 0.7806,
"step": 248
},
{
"epoch": 0.57,
"learning_rate": 0.00019632477746811232,
"loss": 0.7971,
"step": 249
},
{
"epoch": 0.57,
"learning_rate": 0.00019628475261316417,
"loss": 0.8409,
"step": 250
},
{
"epoch": 0.58,
"learning_rate": 0.00019624451511711198,
"loss": 0.7432,
"step": 251
},
{
"epoch": 0.58,
"learning_rate": 0.00019620406506881875,
"loss": 0.9096,
"step": 252
},
{
"epoch": 0.58,
"learning_rate": 0.00019616340255761676,
"loss": 0.8004,
"step": 253
},
{
"epoch": 0.58,
"learning_rate": 0.00019612252767330763,
"loss": 0.7978,
"step": 254
},
{
"epoch": 0.58,
"learning_rate": 0.0001960814405061619,
"loss": 0.9535,
"step": 255
},
{
"epoch": 0.59,
"learning_rate": 0.000196040141146919,
"loss": 0.9945,
"step": 256
},
{
"epoch": 0.59,
"learning_rate": 0.0001959986296867869,
"loss": 0.9703,
"step": 257
},
{
"epoch": 0.59,
"learning_rate": 0.00019595690621744208,
"loss": 0.9639,
"step": 258
},
{
"epoch": 0.59,
"learning_rate": 0.00019591497083102914,
"loss": 0.9312,
"step": 259
},
{
"epoch": 0.6,
"learning_rate": 0.00019587282362016083,
"loss": 0.7709,
"step": 260
},
{
"epoch": 0.6,
"learning_rate": 0.0001958304646779175,
"loss": 0.8547,
"step": 261
},
{
"epoch": 0.6,
"learning_rate": 0.00019578789409784727,
"loss": 0.8081,
"step": 262
},
{
"epoch": 0.6,
"learning_rate": 0.00019574511197396563,
"loss": 0.8476,
"step": 263
},
{
"epoch": 0.6,
"learning_rate": 0.00019570211840075517,
"loss": 0.9658,
"step": 264
},
{
"epoch": 0.61,
"learning_rate": 0.00019565891347316552,
"loss": 0.7778,
"step": 265
},
{
"epoch": 0.61,
"learning_rate": 0.0001956154972866131,
"loss": 0.9926,
"step": 266
},
{
"epoch": 0.61,
"learning_rate": 0.0001955718699369808,
"loss": 0.957,
"step": 267
},
{
"epoch": 0.61,
"learning_rate": 0.000195528031520618,
"loss": 0.9396,
"step": 268
},
{
"epoch": 0.62,
"learning_rate": 0.00019548398213434007,
"loss": 0.9049,
"step": 269
},
{
"epoch": 0.62,
"learning_rate": 0.00019543972187542833,
"loss": 0.9683,
"step": 270
},
{
"epoch": 0.62,
"learning_rate": 0.00019539525084162992,
"loss": 0.8555,
"step": 271
},
{
"epoch": 0.62,
"learning_rate": 0.00019535056913115725,
"loss": 0.8489,
"step": 272
},
{
"epoch": 0.63,
"learning_rate": 0.0001953056768426882,
"loss": 0.8728,
"step": 273
},
{
"epoch": 0.63,
"learning_rate": 0.00019526057407536564,
"loss": 0.9443,
"step": 274
},
{
"epoch": 0.63,
"learning_rate": 0.00019521526092879725,
"loss": 0.8161,
"step": 275
},
{
"epoch": 0.63,
"learning_rate": 0.00019516973750305532,
"loss": 0.8936,
"step": 276
},
{
"epoch": 0.63,
"learning_rate": 0.00019512400389867657,
"loss": 0.8315,
"step": 277
},
{
"epoch": 0.64,
"learning_rate": 0.00019507806021666188,
"loss": 0.9298,
"step": 278
},
{
"epoch": 0.64,
"learning_rate": 0.00019503190655847604,
"loss": 0.8235,
"step": 279
},
{
"epoch": 0.64,
"learning_rate": 0.00019498554302604766,
"loss": 0.9245,
"step": 280
},
{
"epoch": 0.64,
"learning_rate": 0.0001949389697217687,
"loss": 0.8302,
"step": 281
},
{
"epoch": 0.65,
"learning_rate": 0.00019489218674849455,
"loss": 0.8488,
"step": 282
},
{
"epoch": 0.65,
"learning_rate": 0.00019484519420954354,
"loss": 0.8177,
"step": 283
},
{
"epoch": 0.65,
"learning_rate": 0.00019479799220869682,
"loss": 1.0039,
"step": 284
},
{
"epoch": 0.65,
"learning_rate": 0.00019475058085019825,
"loss": 0.7685,
"step": 285
},
{
"epoch": 0.66,
"learning_rate": 0.00019470296023875387,
"loss": 0.9174,
"step": 286
},
{
"epoch": 0.66,
"learning_rate": 0.000194655130479532,
"loss": 1.0997,
"step": 287
},
{
"epoch": 0.66,
"learning_rate": 0.00019460709167816274,
"loss": 0.9759,
"step": 288
},
{
"epoch": 0.66,
"learning_rate": 0.0001945588439407379,
"loss": 0.9397,
"step": 289
},
{
"epoch": 0.66,
"learning_rate": 0.00019451038737381077,
"loss": 1.0367,
"step": 290
},
{
"epoch": 0.67,
"learning_rate": 0.00019446172208439574,
"loss": 0.8298,
"step": 291
},
{
"epoch": 0.67,
"learning_rate": 0.0001944128481799682,
"loss": 0.9094,
"step": 292
},
{
"epoch": 0.67,
"learning_rate": 0.00019436376576846423,
"loss": 1.1234,
"step": 293
},
{
"epoch": 0.67,
"learning_rate": 0.00019431447495828045,
"loss": 0.9103,
"step": 294
},
{
"epoch": 0.68,
"learning_rate": 0.0001942649758582737,
"loss": 0.7841,
"step": 295
},
{
"epoch": 0.68,
"learning_rate": 0.00019421526857776072,
"loss": 0.8817,
"step": 296
},
{
"epoch": 0.68,
"learning_rate": 0.00019416535322651818,
"loss": 1.0682,
"step": 297
},
{
"epoch": 0.68,
"learning_rate": 0.00019411522991478214,
"loss": 0.9201,
"step": 298
},
{
"epoch": 0.68,
"learning_rate": 0.000194064898753248,
"loss": 4.1834,
"step": 299
},
{
"epoch": 0.69,
"learning_rate": 0.00019401435985307012,
"loss": 1.0391,
"step": 300
},
{
"epoch": 0.69,
"learning_rate": 0.00019396361332586166,
"loss": 2.5015,
"step": 301
},
{
"epoch": 0.69,
"learning_rate": 0.0001939126592836944,
"loss": 0.7927,
"step": 302
},
{
"epoch": 0.69,
"learning_rate": 0.0001938614978390983,
"loss": 2.2345,
"step": 303
},
{
"epoch": 0.7,
"learning_rate": 0.00019381012910506146,
"loss": 0.9311,
"step": 304
},
{
"epoch": 0.7,
"learning_rate": 0.00019375855319502962,
"loss": 0.9713,
"step": 305
},
{
"epoch": 0.7,
"learning_rate": 0.00019370677022290624,
"loss": 0.8967,
"step": 306
},
{
"epoch": 0.7,
"learning_rate": 0.00019365478030305196,
"loss": 3.095,
"step": 307
},
{
"epoch": 0.71,
"learning_rate": 0.0001936025835502845,
"loss": 1.1008,
"step": 308
},
{
"epoch": 0.71,
"learning_rate": 0.0001935501800798783,
"loss": 1.5409,
"step": 309
},
{
"epoch": 0.71,
"learning_rate": 0.00019349757000756444,
"loss": 1.02,
"step": 310
},
{
"epoch": 0.71,
"learning_rate": 0.00019344475344953012,
"loss": 1.0101,
"step": 311
},
{
"epoch": 0.71,
"learning_rate": 0.0001933917305224187,
"loss": 0.7686,
"step": 312
},
{
"epoch": 0.72,
"learning_rate": 0.0001933385013433292,
"loss": 1.1061,
"step": 313
},
{
"epoch": 0.72,
"learning_rate": 0.0001932850660298162,
"loss": 0.8083,
"step": 314
},
{
"epoch": 0.72,
"learning_rate": 0.0001932314246998895,
"loss": 1.1942,
"step": 315
},
{
"epoch": 0.72,
"learning_rate": 0.00019317757747201384,
"loss": 0.8551,
"step": 316
},
{
"epoch": 0.73,
"learning_rate": 0.00019312352446510878,
"loss": 0.9049,
"step": 317
},
{
"epoch": 0.73,
"learning_rate": 0.00019306926579854821,
"loss": 0.7072,
"step": 318
},
{
"epoch": 0.73,
"learning_rate": 0.00019301480159216028,
"loss": 0.8552,
"step": 319
},
{
"epoch": 0.73,
"learning_rate": 0.00019296013196622706,
"loss": 0.8414,
"step": 320
},
{
"epoch": 0.74,
"learning_rate": 0.0001929052570414843,
"loss": 0.9198,
"step": 321
},
{
"epoch": 0.74,
"learning_rate": 0.00019285017693912107,
"loss": 2.1953,
"step": 322
},
{
"epoch": 0.74,
"learning_rate": 0.00019279489178077969,
"loss": 0.851,
"step": 323
},
{
"epoch": 0.74,
"learning_rate": 0.00019273940168855518,
"loss": 1.0239,
"step": 324
},
{
"epoch": 0.74,
"learning_rate": 0.00019268370678499533,
"loss": 1.5125,
"step": 325
},
{
"epoch": 0.75,
"learning_rate": 0.00019262780719310008,
"loss": 0.9171,
"step": 326
},
{
"epoch": 0.75,
"learning_rate": 0.00019257170303632148,
"loss": 0.9794,
"step": 327
},
{
"epoch": 0.75,
"learning_rate": 0.00019251539443856344,
"loss": 0.9023,
"step": 328
},
{
"epoch": 0.75,
"learning_rate": 0.00019245888152418124,
"loss": 1.058,
"step": 329
},
{
"epoch": 0.76,
"learning_rate": 0.00019240216441798142,
"loss": 0.9411,
"step": 330
},
{
"epoch": 0.76,
"learning_rate": 0.0001923452432452215,
"loss": 1.197,
"step": 331
},
{
"epoch": 0.76,
"learning_rate": 0.0001922881181316097,
"loss": 0.9253,
"step": 332
},
{
"epoch": 0.76,
"learning_rate": 0.0001922307892033046,
"loss": 1.156,
"step": 333
},
{
"epoch": 0.77,
"learning_rate": 0.00019217325658691482,
"loss": 0.9424,
"step": 334
},
{
"epoch": 0.77,
"learning_rate": 0.00019211552040949891,
"loss": 1.1147,
"step": 335
},
{
"epoch": 0.77,
"learning_rate": 0.00019205758079856498,
"loss": 0.8528,
"step": 336
},
{
"epoch": 0.77,
"learning_rate": 0.0001919994378820704,
"loss": 0.8105,
"step": 337
},
{
"epoch": 0.77,
"learning_rate": 0.00019194109178842153,
"loss": 0.9279,
"step": 338
},
{
"epoch": 0.78,
"learning_rate": 0.00019188254264647337,
"loss": 0.9231,
"step": 339
},
{
"epoch": 0.78,
"learning_rate": 0.00019182379058552948,
"loss": 1.0425,
"step": 340
},
{
"epoch": 0.78,
"learning_rate": 0.00019176483573534142,
"loss": 0.8794,
"step": 341
},
{
"epoch": 0.78,
"learning_rate": 0.00019170567822610873,
"loss": 0.9873,
"step": 342
},
{
"epoch": 0.79,
"learning_rate": 0.0001916463181884784,
"loss": 0.8146,
"step": 343
},
{
"epoch": 0.79,
"learning_rate": 0.00019158675575354478,
"loss": 1.027,
"step": 344
},
{
"epoch": 0.79,
"learning_rate": 0.00019152699105284913,
"loss": 0.8093,
"step": 345
},
{
"epoch": 0.79,
"learning_rate": 0.0001914670242183795,
"loss": 0.951,
"step": 346
},
{
"epoch": 0.79,
"learning_rate": 0.00019140685538257028,
"loss": 0.9268,
"step": 347
},
{
"epoch": 0.8,
"learning_rate": 0.00019134648467830198,
"loss": 1.0205,
"step": 348
},
{
"epoch": 0.8,
"learning_rate": 0.00019128591223890092,
"loss": 0.9043,
"step": 349
},
{
"epoch": 0.8,
"learning_rate": 0.00019122513819813902,
"loss": 0.7387,
"step": 350
},
{
"epoch": 0.8,
"learning_rate": 0.0001911641626902333,
"loss": 0.9422,
"step": 351
},
{
"epoch": 0.81,
"learning_rate": 0.00019110298584984578,
"loss": 0.9015,
"step": 352
},
{
"epoch": 0.81,
"learning_rate": 0.0001910416078120832,
"loss": 0.7522,
"step": 353
},
{
"epoch": 0.81,
"learning_rate": 0.00019098002871249646,
"loss": 0.9722,
"step": 354
},
{
"epoch": 0.81,
"learning_rate": 0.0001909182486870806,
"loss": 0.8358,
"step": 355
},
{
"epoch": 0.82,
"learning_rate": 0.00019085626787227443,
"loss": 0.9859,
"step": 356
},
{
"epoch": 0.82,
"learning_rate": 0.00019079408640496013,
"loss": 0.7796,
"step": 357
},
{
"epoch": 0.82,
"learning_rate": 0.00019073170442246302,
"loss": 0.8617,
"step": 358
},
{
"epoch": 0.82,
"learning_rate": 0.0001906691220625513,
"loss": 0.7727,
"step": 359
},
{
"epoch": 0.82,
"learning_rate": 0.0001906063394634356,
"loss": 0.8786,
"step": 360
},
{
"epoch": 0.83,
"learning_rate": 0.0001905433567637689,
"loss": 0.9117,
"step": 361
},
{
"epoch": 0.83,
"learning_rate": 0.000190480174102646,
"loss": 0.9182,
"step": 362
},
{
"epoch": 0.83,
"learning_rate": 0.0001904167916196033,
"loss": 0.9706,
"step": 363
},
{
"epoch": 0.83,
"learning_rate": 0.0001903532094546186,
"loss": 0.8036,
"step": 364
},
{
"epoch": 0.84,
"learning_rate": 0.0001902894277481105,
"loss": 0.902,
"step": 365
},
{
"epoch": 0.84,
"learning_rate": 0.00019022544664093854,
"loss": 0.9231,
"step": 366
},
{
"epoch": 0.84,
"learning_rate": 0.00019016126627440237,
"loss": 0.9751,
"step": 367
},
{
"epoch": 0.84,
"learning_rate": 0.0001900968867902419,
"loss": 0.8373,
"step": 368
},
{
"epoch": 0.85,
"learning_rate": 0.0001900323083306367,
"loss": 0.8695,
"step": 369
},
{
"epoch": 0.85,
"learning_rate": 0.0001899675310382057,
"loss": 0.8654,
"step": 370
},
{
"epoch": 0.85,
"learning_rate": 0.00018990255505600706,
"loss": 0.98,
"step": 371
},
{
"epoch": 0.85,
"learning_rate": 0.00018983738052753767,
"loss": 0.7454,
"step": 372
},
{
"epoch": 0.85,
"learning_rate": 0.00018977200759673295,
"loss": 0.829,
"step": 373
},
{
"epoch": 0.86,
"learning_rate": 0.00018970643640796642,
"loss": 0.8262,
"step": 374
},
{
"epoch": 0.86,
"learning_rate": 0.0001896406671060495,
"loss": 1.0659,
"step": 375
},
{
"epoch": 0.86,
"learning_rate": 0.00018957469983623112,
"loss": 0.8551,
"step": 376
},
{
"epoch": 0.86,
"learning_rate": 0.00018950853474419742,
"loss": 0.7991,
"step": 377
},
{
"epoch": 0.87,
"learning_rate": 0.0001894421719760714,
"loss": 0.8662,
"step": 378
},
{
"epoch": 0.87,
"learning_rate": 0.00018937561167841263,
"loss": 0.8817,
"step": 379
},
{
"epoch": 0.87,
"learning_rate": 0.00018930885399821693,
"loss": 1.0894,
"step": 380
},
{
"epoch": 0.87,
"learning_rate": 0.000189241899082916,
"loss": 0.8225,
"step": 381
},
{
"epoch": 0.88,
"learning_rate": 0.00018917474708037718,
"loss": 0.9065,
"step": 382
},
{
"epoch": 0.88,
"learning_rate": 0.00018910739813890302,
"loss": 0.8779,
"step": 383
},
{
"epoch": 0.88,
"learning_rate": 0.00018903985240723104,
"loss": 0.7909,
"step": 384
},
{
"epoch": 0.88,
"learning_rate": 0.00018897211003453328,
"loss": 0.7649,
"step": 385
},
{
"epoch": 0.88,
"learning_rate": 0.00018890417117041619,
"loss": 0.9788,
"step": 386
},
{
"epoch": 0.89,
"learning_rate": 0.00018883603596492004,
"loss": 0.938,
"step": 387
},
{
"epoch": 0.89,
"learning_rate": 0.00018876770456851877,
"loss": 0.9032,
"step": 388
},
{
"epoch": 0.89,
"learning_rate": 0.00018869917713211964,
"loss": 0.9059,
"step": 389
},
{
"epoch": 0.89,
"learning_rate": 0.00018863045380706274,
"loss": 0.8896,
"step": 390
},
{
"epoch": 0.9,
"learning_rate": 0.0001885615347451209,
"loss": 0.7614,
"step": 391
},
{
"epoch": 0.9,
"learning_rate": 0.0001884924200984991,
"loss": 0.978,
"step": 392
},
{
"epoch": 0.9,
"learning_rate": 0.0001884231100198344,
"loss": 0.9406,
"step": 393
},
{
"epoch": 0.9,
"learning_rate": 0.00018835360466219533,
"loss": 0.7555,
"step": 394
},
{
"epoch": 0.9,
"learning_rate": 0.0001882839041790818,
"loss": 0.9049,
"step": 395
},
{
"epoch": 0.91,
"learning_rate": 0.00018821400872442458,
"loss": 0.7041,
"step": 396
},
{
"epoch": 0.91,
"learning_rate": 0.00018814391845258505,
"loss": 0.8995,
"step": 397
},
{
"epoch": 0.91,
"learning_rate": 0.0001880736335183548,
"loss": 0.7461,
"step": 398
},
{
"epoch": 0.91,
"learning_rate": 0.00018800315407695539,
"loss": 0.9954,
"step": 399
},
{
"epoch": 0.92,
"learning_rate": 0.00018793248028403788,
"loss": 0.9035,
"step": 400
},
{
"epoch": 0.92,
"learning_rate": 0.0001878616122956826,
"loss": 0.9083,
"step": 401
},
{
"epoch": 0.92,
"learning_rate": 0.00018779055026839868,
"loss": 0.7286,
"step": 402
},
{
"epoch": 0.92,
"learning_rate": 0.0001877192943591239,
"loss": 0.8001,
"step": 403
},
{
"epoch": 0.93,
"learning_rate": 0.00018764784472522403,
"loss": 0.8795,
"step": 404
},
{
"epoch": 0.93,
"learning_rate": 0.0001875762015244929,
"loss": 0.8912,
"step": 405
},
{
"epoch": 0.93,
"learning_rate": 0.00018750436491515163,
"loss": 0.8848,
"step": 406
},
{
"epoch": 0.93,
"learning_rate": 0.00018743233505584862,
"loss": 0.8512,
"step": 407
},
{
"epoch": 0.93,
"learning_rate": 0.00018736011210565898,
"loss": 0.8537,
"step": 408
},
{
"epoch": 0.94,
"learning_rate": 0.00018728769622408423,
"loss": 0.8777,
"step": 409
},
{
"epoch": 0.94,
"learning_rate": 0.00018721508757105202,
"loss": 0.7849,
"step": 410
},
{
"epoch": 0.94,
"learning_rate": 0.00018714228630691576,
"loss": 0.9669,
"step": 411
},
{
"epoch": 0.94,
"learning_rate": 0.0001870692925924541,
"loss": 0.9299,
"step": 412
},
{
"epoch": 0.95,
"learning_rate": 0.00018699610658887088,
"loss": 1.0188,
"step": 413
},
{
"epoch": 0.95,
"learning_rate": 0.00018692272845779448,
"loss": 0.8388,
"step": 414
},
{
"epoch": 0.95,
"learning_rate": 0.00018684915836127765,
"loss": 0.7904,
"step": 415
},
{
"epoch": 0.95,
"learning_rate": 0.00018677539646179707,
"loss": 0.9689,
"step": 416
},
{
"epoch": 0.96,
"learning_rate": 0.00018670144292225297,
"loss": 0.7339,
"step": 417
},
{
"epoch": 0.96,
"learning_rate": 0.00018662729790596888,
"loss": 0.7894,
"step": 418
},
{
"epoch": 0.96,
"learning_rate": 0.00018655296157669117,
"loss": 0.7163,
"step": 419
},
{
"epoch": 0.96,
"learning_rate": 0.00018647843409858869,
"loss": 0.8642,
"step": 420
},
{
"epoch": 0.96,
"learning_rate": 0.00018640371563625246,
"loss": 0.9281,
"step": 421
},
{
"epoch": 0.97,
"learning_rate": 0.00018632880635469526,
"loss": 0.834,
"step": 422
},
{
"epoch": 0.97,
"learning_rate": 0.00018625370641935129,
"loss": 0.7316,
"step": 423
},
{
"epoch": 0.97,
"learning_rate": 0.00018617841599607586,
"loss": 0.8504,
"step": 424
},
{
"epoch": 0.97,
"learning_rate": 0.00018610293525114492,
"loss": 0.8731,
"step": 425
},
{
"epoch": 0.98,
"learning_rate": 0.00018602726435125474,
"loss": 0.8803,
"step": 426
},
{
"epoch": 0.98,
"learning_rate": 0.0001859514034635215,
"loss": 0.8417,
"step": 427
},
{
"epoch": 0.98,
"learning_rate": 0.000185875352755481,
"loss": 0.8947,
"step": 428
},
{
"epoch": 0.98,
"learning_rate": 0.00018579911239508827,
"loss": 0.8368,
"step": 429
},
{
"epoch": 0.99,
"learning_rate": 0.00018572268255071718,
"loss": 0.8231,
"step": 430
},
{
"epoch": 0.99,
"learning_rate": 0.00018564606339116,
"loss": 0.8576,
"step": 431
},
{
"epoch": 0.99,
"learning_rate": 0.0001855692550856272,
"loss": 0.8753,
"step": 432
},
{
"epoch": 0.99,
"learning_rate": 0.00018549225780374685,
"loss": 0.7778,
"step": 433
},
{
"epoch": 0.99,
"learning_rate": 0.00018541507171556445,
"loss": 0.7516,
"step": 434
},
{
"epoch": 1.0,
"learning_rate": 0.0001853376969915425,
"loss": 0.7466,
"step": 435
},
{
"epoch": 1.0,
"learning_rate": 0.00018526013380255999,
"loss": 0.917,
"step": 436
},
{
"epoch": 1.0,
"learning_rate": 0.00018518238231991218,
"loss": 0.9042,
"step": 437
},
{
"epoch": 1.0,
"learning_rate": 0.00018510444271531022,
"loss": 0.8587,
"step": 438
},
{
"epoch": 1.01,
"learning_rate": 0.00018502631516088066,
"loss": 0.9001,
"step": 439
},
{
"epoch": 1.01,
"learning_rate": 0.0001849479998291651,
"loss": 0.7977,
"step": 440
},
{
"epoch": 1.01,
"learning_rate": 0.00018486949689311993,
"loss": 0.8711,
"step": 441
},
{
"epoch": 1.01,
"learning_rate": 0.00018479080652611583,
"loss": 0.7192,
"step": 442
},
{
"epoch": 1.01,
"learning_rate": 0.0001847119289019373,
"loss": 0.9608,
"step": 443
},
{
"epoch": 1.02,
"learning_rate": 0.00018463286419478255,
"loss": 0.7097,
"step": 444
},
{
"epoch": 1.02,
"learning_rate": 0.0001845536125792629,
"loss": 0.7354,
"step": 445
},
{
"epoch": 1.02,
"learning_rate": 0.0001844741742304024,
"loss": 0.8711,
"step": 446
},
{
"epoch": 1.02,
"learning_rate": 0.00018439454932363755,
"loss": 0.8832,
"step": 447
},
{
"epoch": 1.03,
"learning_rate": 0.00018431473803481684,
"loss": 0.932,
"step": 448
},
{
"epoch": 1.03,
"learning_rate": 0.00018423474054020034,
"loss": 0.8394,
"step": 449
},
{
"epoch": 1.03,
"learning_rate": 0.00018415455701645942,
"loss": 0.7698,
"step": 450
},
{
"epoch": 1.03,
"learning_rate": 0.00018407418764067627,
"loss": 0.8856,
"step": 451
},
{
"epoch": 1.04,
"learning_rate": 0.00018399363259034347,
"loss": 0.8529,
"step": 452
},
{
"epoch": 1.04,
"learning_rate": 0.00018391289204336368,
"loss": 0.9898,
"step": 453
},
{
"epoch": 1.04,
"learning_rate": 0.00018383196617804926,
"loss": 0.8312,
"step": 454
},
{
"epoch": 1.04,
"learning_rate": 0.00018375085517312182,
"loss": 0.8234,
"step": 455
},
{
"epoch": 1.04,
"learning_rate": 0.00018366955920771184,
"loss": 0.7871,
"step": 456
},
{
"epoch": 1.05,
"learning_rate": 0.00018358807846135825,
"loss": 0.9814,
"step": 457
},
{
"epoch": 1.05,
"learning_rate": 0.00018350641311400812,
"loss": 0.8183,
"step": 458
},
{
"epoch": 1.05,
"learning_rate": 0.0001834245633460161,
"loss": 0.8961,
"step": 459
},
{
"epoch": 1.05,
"learning_rate": 0.00018334252933814427,
"loss": 0.9166,
"step": 460
},
{
"epoch": 1.06,
"learning_rate": 0.00018326031127156148,
"loss": 1.0031,
"step": 461
},
{
"epoch": 1.06,
"learning_rate": 0.00018317790932784317,
"loss": 0.8171,
"step": 462
},
{
"epoch": 1.06,
"learning_rate": 0.0001830953236889707,
"loss": 0.83,
"step": 463
},
{
"epoch": 1.06,
"learning_rate": 0.00018301255453733134,
"loss": 0.8134,
"step": 464
},
{
"epoch": 1.07,
"learning_rate": 0.0001829296020557174,
"loss": 0.8561,
"step": 465
},
{
"epoch": 1.07,
"learning_rate": 0.0001828464664273263,
"loss": 0.8669,
"step": 466
},
{
"epoch": 1.07,
"learning_rate": 0.0001827631478357597,
"loss": 1.003,
"step": 467
},
{
"epoch": 1.07,
"learning_rate": 0.00018267964646502357,
"loss": 0.8715,
"step": 468
},
{
"epoch": 1.07,
"learning_rate": 0.00018259596249952731,
"loss": 0.7434,
"step": 469
},
{
"epoch": 1.08,
"learning_rate": 0.00018251209612408373,
"loss": 0.9163,
"step": 470
},
{
"epoch": 1.08,
"learning_rate": 0.00018242804752390844,
"loss": 1.0639,
"step": 471
},
{
"epoch": 1.08,
"learning_rate": 0.00018234381688461942,
"loss": 0.8266,
"step": 472
},
{
"epoch": 1.08,
"learning_rate": 0.00018225940439223684,
"loss": 0.7582,
"step": 473
},
{
"epoch": 1.09,
"learning_rate": 0.0001821748102331823,
"loss": 0.8547,
"step": 474
},
{
"epoch": 1.09,
"learning_rate": 0.0001820900345942787,
"loss": 0.7908,
"step": 475
},
{
"epoch": 1.09,
"learning_rate": 0.00018200507766274977,
"loss": 0.6203,
"step": 476
},
{
"epoch": 1.09,
"learning_rate": 0.0001819199396262195,
"loss": 0.806,
"step": 477
},
{
"epoch": 1.1,
"learning_rate": 0.0001818346206727119,
"loss": 0.8016,
"step": 478
},
{
"epoch": 1.1,
"learning_rate": 0.0001817491209906506,
"loss": 0.8548,
"step": 479
},
{
"epoch": 1.1,
"learning_rate": 0.00018166344076885827,
"loss": 0.9194,
"step": 480
},
{
"epoch": 1.1,
"learning_rate": 0.00018157758019655634,
"loss": 0.8704,
"step": 481
},
{
"epoch": 1.1,
"learning_rate": 0.00018149153946336446,
"loss": 0.8373,
"step": 482
},
{
"epoch": 1.11,
"learning_rate": 0.0001814053187593003,
"loss": 0.8229,
"step": 483
},
{
"epoch": 1.11,
"learning_rate": 0.00018131891827477884,
"loss": 0.8289,
"step": 484
},
{
"epoch": 1.11,
"learning_rate": 0.00018123233820061218,
"loss": 0.7753,
"step": 485
},
{
"epoch": 1.11,
"learning_rate": 0.00018114557872800905,
"loss": 1.029,
"step": 486
},
{
"epoch": 1.12,
"learning_rate": 0.0001810586400485743,
"loss": 0.6198,
"step": 487
},
{
"epoch": 1.12,
"learning_rate": 0.0001809715223543087,
"loss": 0.8418,
"step": 488
},
{
"epoch": 1.12,
"learning_rate": 0.00018088422583760813,
"loss": 0.7421,
"step": 489
},
{
"epoch": 1.12,
"learning_rate": 0.0001807967506912636,
"loss": 0.8032,
"step": 490
},
{
"epoch": 1.12,
"learning_rate": 0.00018070909710846052,
"loss": 0.7956,
"step": 491
},
{
"epoch": 1.13,
"learning_rate": 0.00018062126528277844,
"loss": 0.9013,
"step": 492
},
{
"epoch": 1.13,
"learning_rate": 0.00018053325540819045,
"loss": 0.9582,
"step": 493
},
{
"epoch": 1.13,
"learning_rate": 0.00018044506767906295,
"loss": 0.6845,
"step": 494
},
{
"epoch": 1.13,
"learning_rate": 0.00018035670229015507,
"loss": 0.8731,
"step": 495
},
{
"epoch": 1.14,
"learning_rate": 0.0001802681594366183,
"loss": 0.8369,
"step": 496
},
{
"epoch": 1.14,
"learning_rate": 0.00018017943931399603,
"loss": 0.6557,
"step": 497
},
{
"epoch": 1.14,
"learning_rate": 0.00018009054211822324,
"loss": 0.7997,
"step": 498
},
{
"epoch": 1.14,
"learning_rate": 0.0001800014680456259,
"loss": 0.8348,
"step": 499
},
{
"epoch": 1.15,
"learning_rate": 0.0001799122172929206,
"loss": 0.9043,
"step": 500
},
{
"epoch": 1.15,
"learning_rate": 0.00017982279005721407,
"loss": 0.8499,
"step": 501
},
{
"epoch": 1.15,
"learning_rate": 0.00017973318653600293,
"loss": 0.8595,
"step": 502
},
{
"epoch": 1.15,
"learning_rate": 0.00017964340692717303,
"loss": 0.9468,
"step": 503
},
{
"epoch": 1.15,
"learning_rate": 0.0001795534514289991,
"loss": 0.9848,
"step": 504
},
{
"epoch": 1.16,
"learning_rate": 0.00017946332024014434,
"loss": 0.7326,
"step": 505
},
{
"epoch": 1.16,
"learning_rate": 0.00017937301355965996,
"loss": 0.8479,
"step": 506
},
{
"epoch": 1.16,
"learning_rate": 0.00017928253158698473,
"loss": 0.8669,
"step": 507
},
{
"epoch": 1.16,
"learning_rate": 0.00017919187452194454,
"loss": 0.8163,
"step": 508
},
{
"epoch": 1.17,
"learning_rate": 0.00017910104256475194,
"loss": 0.926,
"step": 509
},
{
"epoch": 1.17,
"learning_rate": 0.00017901003591600575,
"loss": 0.7956,
"step": 510
},
{
"epoch": 1.17,
"learning_rate": 0.00017891885477669064,
"loss": 0.9002,
"step": 511
},
{
"epoch": 1.17,
"learning_rate": 0.00017882749934817652,
"loss": 0.787,
"step": 512
},
{
"epoch": 1.18,
"learning_rate": 0.00017873596983221832,
"loss": 0.7519,
"step": 513
},
{
"epoch": 1.18,
"learning_rate": 0.0001786442664309554,
"loss": 0.8067,
"step": 514
},
{
"epoch": 1.18,
"learning_rate": 0.00017855238934691108,
"loss": 0.8824,
"step": 515
},
{
"epoch": 1.18,
"learning_rate": 0.0001784603387829923,
"loss": 0.8014,
"step": 516
},
{
"epoch": 1.18,
"learning_rate": 0.00017836811494248919,
"loss": 0.6672,
"step": 517
},
{
"epoch": 1.19,
"learning_rate": 0.00017827571802907444,
"loss": 0.8516,
"step": 518
},
{
"epoch": 1.19,
"learning_rate": 0.000178183148246803,
"loss": 0.8476,
"step": 519
},
{
"epoch": 1.19,
"learning_rate": 0.00017809040580011164,
"loss": 0.8493,
"step": 520
},
{
"epoch": 1.19,
"learning_rate": 0.0001779974908938184,
"loss": 0.7288,
"step": 521
},
{
"epoch": 1.2,
"learning_rate": 0.00017790440373312223,
"loss": 0.7443,
"step": 522
},
{
"epoch": 1.2,
"learning_rate": 0.00017781114452360245,
"loss": 0.8767,
"step": 523
},
{
"epoch": 1.2,
"learning_rate": 0.00017771771347121842,
"loss": 0.8025,
"step": 524
},
{
"epoch": 1.2,
"learning_rate": 0.0001776241107823089,
"loss": 0.8842,
"step": 525
},
{
"epoch": 1.21,
"learning_rate": 0.00017753033666359177,
"loss": 0.9648,
"step": 526
},
{
"epoch": 1.21,
"learning_rate": 0.00017743639132216353,
"loss": 0.7872,
"step": 527
},
{
"epoch": 1.21,
"learning_rate": 0.0001773422749654988,
"loss": 0.9122,
"step": 528
},
{
"epoch": 1.21,
"learning_rate": 0.00017724798780144983,
"loss": 0.7688,
"step": 529
},
{
"epoch": 1.21,
"learning_rate": 0.0001771535300382461,
"loss": 0.8938,
"step": 530
},
{
"epoch": 1.22,
"learning_rate": 0.00017705890188449394,
"loss": 0.7152,
"step": 531
},
{
"epoch": 1.22,
"learning_rate": 0.0001769641035491759,
"loss": 0.7077,
"step": 532
},
{
"epoch": 1.22,
"learning_rate": 0.00017686913524165036,
"loss": 0.8872,
"step": 533
},
{
"epoch": 1.22,
"learning_rate": 0.00017677399717165116,
"loss": 0.8775,
"step": 534
},
{
"epoch": 1.23,
"learning_rate": 0.00017667868954928694,
"loss": 0.8508,
"step": 535
},
{
"epoch": 1.23,
"learning_rate": 0.00017658321258504092,
"loss": 0.8589,
"step": 536
},
{
"epoch": 1.23,
"learning_rate": 0.00017648756648977018,
"loss": 0.6499,
"step": 537
},
{
"epoch": 1.23,
"learning_rate": 0.00017639175147470538,
"loss": 0.8927,
"step": 538
},
{
"epoch": 1.23,
"learning_rate": 0.00017629576775145026,
"loss": 0.8702,
"step": 539
},
{
"epoch": 1.24,
"learning_rate": 0.00017619961553198108,
"loss": 0.7958,
"step": 540
},
{
"epoch": 1.24,
"learning_rate": 0.00017610329502864625,
"loss": 0.8582,
"step": 541
},
{
"epoch": 1.24,
"learning_rate": 0.00017600680645416583,
"loss": 0.7905,
"step": 542
},
{
"epoch": 1.24,
"learning_rate": 0.0001759101500216311,
"loss": 0.7574,
"step": 543
},
{
"epoch": 1.25,
"learning_rate": 0.00017581332594450392,
"loss": 0.861,
"step": 544
},
{
"epoch": 1.25,
"learning_rate": 0.00017571633443661658,
"loss": 0.7682,
"step": 545
},
{
"epoch": 1.25,
"learning_rate": 0.00017561917571217093,
"loss": 0.7547,
"step": 546
},
{
"epoch": 1.25,
"learning_rate": 0.00017552184998573825,
"loss": 0.7852,
"step": 547
},
{
"epoch": 1.26,
"learning_rate": 0.0001754243574722586,
"loss": 0.7635,
"step": 548
},
{
"epoch": 1.26,
"learning_rate": 0.00017532669838704035,
"loss": 0.8714,
"step": 549
},
{
"epoch": 1.26,
"learning_rate": 0.00017522887294575977,
"loss": 0.7839,
"step": 550
},
{
"epoch": 1.26,
"learning_rate": 0.00017513088136446054,
"loss": 0.8551,
"step": 551
},
{
"epoch": 1.26,
"learning_rate": 0.00017503272385955318,
"loss": 0.7367,
"step": 552
},
{
"epoch": 1.27,
"learning_rate": 0.00017493440064781475,
"loss": 0.9257,
"step": 553
},
{
"epoch": 1.27,
"learning_rate": 0.00017483591194638817,
"loss": 0.8246,
"step": 554
},
{
"epoch": 1.27,
"learning_rate": 0.00017473725797278192,
"loss": 0.8319,
"step": 555
},
{
"epoch": 1.27,
"learning_rate": 0.00017463843894486937,
"loss": 0.8304,
"step": 556
},
{
"epoch": 1.28,
"learning_rate": 0.00017453945508088853,
"loss": 0.6536,
"step": 557
},
{
"epoch": 1.28,
"learning_rate": 0.00017444030659944138,
"loss": 0.7606,
"step": 558
},
{
"epoch": 1.28,
"learning_rate": 0.00017434099371949345,
"loss": 0.7084,
"step": 559
},
{
"epoch": 1.28,
"learning_rate": 0.00017424151666037329,
"loss": 0.8891,
"step": 560
},
{
"epoch": 1.29,
"learning_rate": 0.00017414187564177217,
"loss": 0.6199,
"step": 561
},
{
"epoch": 1.29,
"learning_rate": 0.00017404207088374333,
"loss": 0.8676,
"step": 562
},
{
"epoch": 1.29,
"learning_rate": 0.0001739421026067017,
"loss": 0.8477,
"step": 563
},
{
"epoch": 1.29,
"learning_rate": 0.00017384197103142328,
"loss": 0.9234,
"step": 564
},
{
"epoch": 1.29,
"learning_rate": 0.0001737416763790447,
"loss": 0.9103,
"step": 565
},
{
"epoch": 1.3,
"learning_rate": 0.00017364121887106286,
"loss": 0.7859,
"step": 566
},
{
"epoch": 1.3,
"learning_rate": 0.00017354059872933415,
"loss": 0.8623,
"step": 567
},
{
"epoch": 1.3,
"learning_rate": 0.00017343981617607424,
"loss": 0.6266,
"step": 568
},
{
"epoch": 1.3,
"learning_rate": 0.00017333887143385743,
"loss": 0.8105,
"step": 569
},
{
"epoch": 1.31,
"learning_rate": 0.00017323776472561627,
"loss": 0.7752,
"step": 570
},
{
"epoch": 1.31,
"learning_rate": 0.0001731364962746409,
"loss": 0.7873,
"step": 571
},
{
"epoch": 1.31,
"learning_rate": 0.0001730350663045788,
"loss": 0.8425,
"step": 572
},
{
"epoch": 1.31,
"learning_rate": 0.00017293347503943406,
"loss": 0.777,
"step": 573
},
{
"epoch": 1.32,
"learning_rate": 0.000172831722703567,
"loss": 0.7348,
"step": 574
},
{
"epoch": 1.32,
"learning_rate": 0.00017272980952169365,
"loss": 0.7797,
"step": 575
},
{
"epoch": 1.32,
"learning_rate": 0.0001726277357188853,
"loss": 0.8328,
"step": 576
},
{
"epoch": 1.32,
"learning_rate": 0.00017252550152056795,
"loss": 0.7109,
"step": 577
},
{
"epoch": 1.32,
"learning_rate": 0.0001724231071525218,
"loss": 0.7905,
"step": 578
},
{
"epoch": 1.33,
"learning_rate": 0.00017232055284088085,
"loss": 0.7541,
"step": 579
},
{
"epoch": 1.33,
"learning_rate": 0.0001722178388121322,
"loss": 0.8954,
"step": 580
},
{
"epoch": 1.33,
"learning_rate": 0.00017211496529311582,
"loss": 0.8362,
"step": 581
},
{
"epoch": 1.33,
"learning_rate": 0.00017201193251102382,
"loss": 0.8436,
"step": 582
},
{
"epoch": 1.34,
"learning_rate": 0.00017190874069340014,
"loss": 0.7594,
"step": 583
},
{
"epoch": 1.34,
"learning_rate": 0.0001718053900681397,
"loss": 0.9342,
"step": 584
},
{
"epoch": 1.34,
"learning_rate": 0.00017170188086348848,
"loss": 0.8934,
"step": 585
},
{
"epoch": 1.34,
"learning_rate": 0.00017159821330804236,
"loss": 0.831,
"step": 586
},
{
"epoch": 1.34,
"learning_rate": 0.0001714943876307472,
"loss": 0.8053,
"step": 587
},
{
"epoch": 1.35,
"learning_rate": 0.00017139040406089786,
"loss": 0.81,
"step": 588
},
{
"epoch": 1.35,
"learning_rate": 0.000171286262828138,
"loss": 0.8245,
"step": 589
},
{
"epoch": 1.35,
"learning_rate": 0.00017118196416245947,
"loss": 0.8232,
"step": 590
},
{
"epoch": 1.35,
"learning_rate": 0.00017107750829420176,
"loss": 0.8244,
"step": 591
},
{
"epoch": 1.36,
"learning_rate": 0.0001709728954540516,
"loss": 0.7863,
"step": 592
},
{
"epoch": 1.36,
"learning_rate": 0.00017086812587304234,
"loss": 0.8274,
"step": 593
},
{
"epoch": 1.36,
"learning_rate": 0.00017076319978255345,
"loss": 0.6595,
"step": 594
},
{
"epoch": 1.36,
"learning_rate": 0.0001706581174143101,
"loss": 0.8582,
"step": 595
},
{
"epoch": 1.37,
"learning_rate": 0.00017055287900038263,
"loss": 0.6873,
"step": 596
},
{
"epoch": 1.37,
"learning_rate": 0.00017044748477318593,
"loss": 0.8673,
"step": 597
},
{
"epoch": 1.37,
"learning_rate": 0.00017034193496547902,
"loss": 0.8055,
"step": 598
},
{
"epoch": 1.37,
"learning_rate": 0.00017023622981036455,
"loss": 0.8232,
"step": 599
},
{
"epoch": 1.37,
"learning_rate": 0.0001701303695412881,
"loss": 0.8745,
"step": 600
},
{
"epoch": 1.38,
"learning_rate": 0.00017002435439203808,
"loss": 0.8034,
"step": 601
},
{
"epoch": 1.38,
"learning_rate": 0.00016991818459674468,
"loss": 0.9006,
"step": 602
},
{
"epoch": 1.38,
"learning_rate": 0.0001698118603898798,
"loss": 0.7828,
"step": 603
},
{
"epoch": 1.38,
"learning_rate": 0.00016970538200625622,
"loss": 0.8413,
"step": 604
},
{
"epoch": 1.39,
"learning_rate": 0.00016959874968102735,
"loss": 0.8669,
"step": 605
},
{
"epoch": 1.39,
"learning_rate": 0.00016949196364968646,
"loss": 0.9277,
"step": 606
},
{
"epoch": 1.39,
"learning_rate": 0.00016938502414806634,
"loss": 0.9256,
"step": 607
},
{
"epoch": 1.39,
"learning_rate": 0.00016927793141233868,
"loss": 0.8613,
"step": 608
},
{
"epoch": 1.4,
"learning_rate": 0.00016917068567901358,
"loss": 0.9439,
"step": 609
},
{
"epoch": 1.4,
"learning_rate": 0.00016906328718493906,
"loss": 0.8606,
"step": 610
},
{
"epoch": 1.4,
"learning_rate": 0.00016895573616730044,
"loss": 0.7483,
"step": 611
},
{
"epoch": 1.4,
"learning_rate": 0.00016884803286362,
"loss": 0.8359,
"step": 612
},
{
"epoch": 1.4,
"learning_rate": 0.0001687401775117562,
"loss": 0.7764,
"step": 613
},
{
"epoch": 1.41,
"learning_rate": 0.00016863217034990342,
"loss": 0.9857,
"step": 614
},
{
"epoch": 1.41,
"learning_rate": 0.0001685240116165912,
"loss": 0.8706,
"step": 615
},
{
"epoch": 1.41,
"learning_rate": 0.0001684157015506839,
"loss": 0.867,
"step": 616
},
{
"epoch": 1.41,
"learning_rate": 0.00016830724039138003,
"loss": 0.7974,
"step": 617
},
{
"epoch": 1.42,
"learning_rate": 0.00016819862837821181,
"loss": 0.7835,
"step": 618
},
{
"epoch": 1.42,
"learning_rate": 0.00016808986575104465,
"loss": 0.7987,
"step": 619
},
{
"epoch": 1.42,
"learning_rate": 0.0001679809527500765,
"loss": 0.7383,
"step": 620
},
{
"epoch": 1.42,
"learning_rate": 0.0001678718896158375,
"loss": 0.9224,
"step": 621
},
{
"epoch": 1.42,
"learning_rate": 0.00016776267658918928,
"loss": 0.8959,
"step": 622
},
{
"epoch": 1.43,
"learning_rate": 0.00016765331391132456,
"loss": 0.6702,
"step": 623
},
{
"epoch": 1.43,
"learning_rate": 0.0001675438018237665,
"loss": 0.6911,
"step": 624
},
{
"epoch": 1.43,
"learning_rate": 0.00016743414056836825,
"loss": 0.9364,
"step": 625
},
{
"epoch": 1.43,
"learning_rate": 0.00016732433038731242,
"loss": 0.7902,
"step": 626
},
{
"epoch": 1.44,
"learning_rate": 0.00016721437152311054,
"loss": 0.8473,
"step": 627
},
{
"epoch": 1.44,
"learning_rate": 0.00016710426421860235,
"loss": 0.8765,
"step": 628
},
{
"epoch": 1.44,
"learning_rate": 0.00016699400871695555,
"loss": 0.7705,
"step": 629
},
{
"epoch": 1.44,
"learning_rate": 0.00016688360526166514,
"loss": 0.8653,
"step": 630
},
{
"epoch": 1.45,
"learning_rate": 0.0001667730540965528,
"loss": 0.9137,
"step": 631
},
{
"epoch": 1.45,
"learning_rate": 0.00016666235546576648,
"loss": 0.9772,
"step": 632
},
{
"epoch": 1.45,
"learning_rate": 0.0001665515096137797,
"loss": 0.6433,
"step": 633
},
{
"epoch": 1.45,
"learning_rate": 0.0001664405167853912,
"loss": 0.8096,
"step": 634
},
{
"epoch": 1.45,
"learning_rate": 0.00016632937722572434,
"loss": 0.7298,
"step": 635
},
{
"epoch": 1.46,
"learning_rate": 0.00016621809118022647,
"loss": 0.6841,
"step": 636
},
{
"epoch": 1.46,
"learning_rate": 0.00016610665889466838,
"loss": 0.9471,
"step": 637
},
{
"epoch": 1.46,
"learning_rate": 0.00016599508061514404,
"loss": 0.8396,
"step": 638
},
{
"epoch": 1.46,
"learning_rate": 0.00016588335658806962,
"loss": 0.8769,
"step": 639
},
{
"epoch": 1.47,
"learning_rate": 0.00016577148706018328,
"loss": 0.8328,
"step": 640
},
{
"epoch": 1.47,
"learning_rate": 0.0001656594722785445,
"loss": 0.8932,
"step": 641
},
{
"epoch": 1.47,
"learning_rate": 0.0001655473124905335,
"loss": 0.8203,
"step": 642
},
{
"epoch": 1.47,
"learning_rate": 0.00016543500794385084,
"loss": 0.8514,
"step": 643
},
{
"epoch": 1.48,
"learning_rate": 0.00016532255888651666,
"loss": 0.7396,
"step": 644
},
{
"epoch": 1.48,
"learning_rate": 0.00016520996556687028,
"loss": 0.9178,
"step": 645
},
{
"epoch": 1.48,
"learning_rate": 0.0001650972282335697,
"loss": 0.6308,
"step": 646
},
{
"epoch": 1.48,
"learning_rate": 0.00016498434713559088,
"loss": 0.9018,
"step": 647
},
{
"epoch": 1.48,
"learning_rate": 0.00016487132252222727,
"loss": 0.8658,
"step": 648
},
{
"epoch": 1.49,
"learning_rate": 0.00016475815464308933,
"loss": 0.8228,
"step": 649
},
{
"epoch": 1.49,
"learning_rate": 0.0001646448437481039,
"loss": 0.8944,
"step": 650
},
{
"epoch": 1.49,
"learning_rate": 0.0001645313900875136,
"loss": 0.8617,
"step": 651
},
{
"epoch": 1.49,
"learning_rate": 0.00016441779391187646,
"loss": 0.9726,
"step": 652
},
{
"epoch": 1.5,
"learning_rate": 0.00016430405547206516,
"loss": 0.693,
"step": 653
},
{
"epoch": 1.5,
"learning_rate": 0.00016419017501926656,
"loss": 0.8272,
"step": 654
},
{
"epoch": 1.5,
"learning_rate": 0.00016407615280498124,
"loss": 0.8523,
"step": 655
},
{
"epoch": 1.5,
"learning_rate": 0.00016396198908102272,
"loss": 0.7444,
"step": 656
},
{
"epoch": 1.51,
"learning_rate": 0.00016384768409951714,
"loss": 0.8366,
"step": 657
},
{
"epoch": 1.51,
"learning_rate": 0.0001637332381129026,
"loss": 0.7441,
"step": 658
},
{
"epoch": 1.51,
"learning_rate": 0.00016361865137392854,
"loss": 0.6694,
"step": 659
},
{
"epoch": 1.51,
"learning_rate": 0.0001635039241356553,
"loss": 0.8103,
"step": 660
},
{
"epoch": 1.51,
"learning_rate": 0.0001633890566514535,
"loss": 0.9135,
"step": 661
},
{
"epoch": 1.52,
"learning_rate": 0.00016327404917500346,
"loss": 0.7327,
"step": 662
},
{
"epoch": 1.52,
"learning_rate": 0.00016315890196029467,
"loss": 0.8425,
"step": 663
},
{
"epoch": 1.52,
"learning_rate": 0.00016304361526162534,
"loss": 0.8812,
"step": 664
},
{
"epoch": 1.52,
"learning_rate": 0.00016292818933360151,
"loss": 0.777,
"step": 665
},
{
"epoch": 1.53,
"learning_rate": 0.0001628126244311369,
"loss": 0.8864,
"step": 666
},
{
"epoch": 1.53,
"learning_rate": 0.00016269692080945198,
"loss": 0.9333,
"step": 667
},
{
"epoch": 1.53,
"learning_rate": 0.00016258107872407375,
"loss": 0.906,
"step": 668
},
{
"epoch": 1.53,
"learning_rate": 0.00016246509843083492,
"loss": 0.7346,
"step": 669
},
{
"epoch": 1.53,
"learning_rate": 0.00016234898018587337,
"loss": 0.8555,
"step": 670
},
{
"epoch": 1.54,
"learning_rate": 0.00016223272424563173,
"loss": 0.8449,
"step": 671
},
{
"epoch": 1.54,
"learning_rate": 0.00016211633086685664,
"loss": 0.8559,
"step": 672
},
{
"epoch": 1.54,
"learning_rate": 0.00016199980030659838,
"loss": 0.7468,
"step": 673
},
{
"epoch": 1.54,
"learning_rate": 0.00016188313282221008,
"loss": 0.7986,
"step": 674
},
{
"epoch": 1.55,
"learning_rate": 0.0001617663286713474,
"loss": 0.7757,
"step": 675
},
{
"epoch": 1.55,
"learning_rate": 0.00016164938811196757,
"loss": 0.8789,
"step": 676
},
{
"epoch": 1.55,
"learning_rate": 0.00016153231140232936,
"loss": 0.5499,
"step": 677
},
{
"epoch": 1.55,
"learning_rate": 0.00016141509880099206,
"loss": 0.9319,
"step": 678
},
{
"epoch": 1.56,
"learning_rate": 0.00016129775056681513,
"loss": 0.6904,
"step": 679
},
{
"epoch": 1.56,
"learning_rate": 0.0001611802669589575,
"loss": 0.8506,
"step": 680
},
{
"epoch": 1.56,
"learning_rate": 0.00016106264823687716,
"loss": 0.7242,
"step": 681
},
{
"epoch": 1.56,
"learning_rate": 0.00016094489466033043,
"loss": 0.6808,
"step": 682
},
{
"epoch": 1.56,
"learning_rate": 0.00016082700648937146,
"loss": 0.8017,
"step": 683
},
{
"epoch": 1.57,
"learning_rate": 0.00016070898398435167,
"loss": 0.9109,
"step": 684
},
{
"epoch": 1.57,
"learning_rate": 0.00016059082740591915,
"loss": 0.7277,
"step": 685
},
{
"epoch": 1.57,
"learning_rate": 0.00016047253701501808,
"loss": 0.8601,
"step": 686
},
{
"epoch": 1.57,
"learning_rate": 0.00016035411307288813,
"loss": 0.9118,
"step": 687
},
{
"epoch": 1.58,
"learning_rate": 0.0001602355558410639,
"loss": 0.8049,
"step": 688
},
{
"epoch": 1.58,
"learning_rate": 0.00016011686558137448,
"loss": 0.8174,
"step": 689
},
{
"epoch": 1.58,
"learning_rate": 0.00015999804255594258,
"loss": 0.8481,
"step": 690
},
{
"epoch": 1.58,
"learning_rate": 0.0001598790870271843,
"loss": 0.7052,
"step": 691
},
{
"epoch": 1.59,
"learning_rate": 0.00015975999925780813,
"loss": 0.8208,
"step": 692
},
{
"epoch": 1.59,
"learning_rate": 0.00015964077951081485,
"loss": 0.7257,
"step": 693
},
{
"epoch": 1.59,
"learning_rate": 0.00015952142804949652,
"loss": 0.858,
"step": 694
},
{
"epoch": 1.59,
"learning_rate": 0.00015940194513743624,
"loss": 0.9242,
"step": 695
},
{
"epoch": 1.59,
"learning_rate": 0.0001592823310385073,
"loss": 0.7924,
"step": 696
},
{
"epoch": 1.6,
"learning_rate": 0.00015916258601687274,
"loss": 0.8788,
"step": 697
},
{
"epoch": 1.6,
"learning_rate": 0.0001590427103369848,
"loss": 0.7946,
"step": 698
},
{
"epoch": 1.6,
"learning_rate": 0.00015892270426358414,
"loss": 0.8318,
"step": 699
},
{
"epoch": 1.6,
"learning_rate": 0.00015880256806169953,
"loss": 0.8983,
"step": 700
},
{
"epoch": 1.61,
"learning_rate": 0.00015868230199664711,
"loss": 0.8889,
"step": 701
},
{
"epoch": 1.61,
"learning_rate": 0.00015856190633402968,
"loss": 0.9692,
"step": 702
},
{
"epoch": 1.61,
"learning_rate": 0.0001584413813397364,
"loss": 0.7787,
"step": 703
},
{
"epoch": 1.61,
"learning_rate": 0.00015832072727994193,
"loss": 0.6455,
"step": 704
},
{
"epoch": 1.62,
"learning_rate": 0.00015819994442110616,
"loss": 1.0006,
"step": 705
},
{
"epoch": 1.62,
"learning_rate": 0.00015807903302997317,
"loss": 0.7384,
"step": 706
},
{
"epoch": 1.62,
"learning_rate": 0.00015795799337357114,
"loss": 0.8517,
"step": 707
},
{
"epoch": 1.62,
"learning_rate": 0.00015783682571921133,
"loss": 0.8446,
"step": 708
},
{
"epoch": 1.62,
"learning_rate": 0.00015771553033448775,
"loss": 0.8227,
"step": 709
},
{
"epoch": 1.63,
"learning_rate": 0.00015759410748727662,
"loss": 0.8374,
"step": 710
},
{
"epoch": 1.63,
"learning_rate": 0.0001574725574457354,
"loss": 0.7274,
"step": 711
},
{
"epoch": 1.63,
"learning_rate": 0.00015735088047830268,
"loss": 0.8728,
"step": 712
},
{
"epoch": 1.63,
"learning_rate": 0.00015722907685369723,
"loss": 1.0569,
"step": 713
},
{
"epoch": 1.64,
"learning_rate": 0.00015710714684091762,
"loss": 0.9775,
"step": 714
},
{
"epoch": 1.64,
"learning_rate": 0.0001569850907092415,
"loss": 0.6832,
"step": 715
},
{
"epoch": 1.64,
"learning_rate": 0.00015686290872822504,
"loss": 0.7358,
"step": 716
},
{
"epoch": 1.64,
"learning_rate": 0.00015674060116770236,
"loss": 0.9015,
"step": 717
},
{
"epoch": 1.64,
"learning_rate": 0.00015661816829778494,
"loss": 0.8516,
"step": 718
},
{
"epoch": 1.65,
"learning_rate": 0.00015649561038886094,
"loss": 0.8911,
"step": 719
},
{
"epoch": 1.65,
"learning_rate": 0.00015637292771159472,
"loss": 0.7098,
"step": 720
},
{
"epoch": 1.65,
"learning_rate": 0.00015625012053692615,
"loss": 0.955,
"step": 721
},
{
"epoch": 1.65,
"learning_rate": 0.0001561271891360701,
"loss": 0.6421,
"step": 722
},
{
"epoch": 1.66,
"learning_rate": 0.0001560041337805157,
"loss": 0.8807,
"step": 723
},
{
"epoch": 1.66,
"learning_rate": 0.00015588095474202595,
"loss": 0.722,
"step": 724
},
{
"epoch": 1.66,
"learning_rate": 0.00015575765229263686,
"loss": 0.8055,
"step": 725
},
{
"epoch": 1.66,
"learning_rate": 0.00015563422670465712,
"loss": 0.7822,
"step": 726
},
{
"epoch": 1.67,
"learning_rate": 0.00015551067825066728,
"loss": 0.8311,
"step": 727
},
{
"epoch": 1.67,
"learning_rate": 0.00015538700720351924,
"loss": 0.8519,
"step": 728
},
{
"epoch": 1.67,
"learning_rate": 0.00015526321383633568,
"loss": 0.7506,
"step": 729
},
{
"epoch": 1.67,
"learning_rate": 0.0001551392984225094,
"loss": 0.8056,
"step": 730
},
{
"epoch": 1.67,
"learning_rate": 0.00015501526123570277,
"loss": 0.6968,
"step": 731
},
{
"epoch": 1.68,
"learning_rate": 0.000154891102549847,
"loss": 0.829,
"step": 732
},
{
"epoch": 1.68,
"learning_rate": 0.0001547668226391417,
"loss": 0.6682,
"step": 733
},
{
"epoch": 1.68,
"learning_rate": 0.00015464242177805422,
"loss": 0.8295,
"step": 734
},
{
"epoch": 1.68,
"learning_rate": 0.00015451790024131895,
"loss": 0.6911,
"step": 735
},
{
"epoch": 1.69,
"learning_rate": 0.00015439325830393687,
"loss": 0.6785,
"step": 736
},
{
"epoch": 1.69,
"learning_rate": 0.00015426849624117472,
"loss": 0.81,
"step": 737
},
{
"epoch": 1.69,
"learning_rate": 0.00015414361432856475,
"loss": 0.9955,
"step": 738
},
{
"epoch": 1.69,
"learning_rate": 0.00015401861284190368,
"loss": 0.8433,
"step": 739
},
{
"epoch": 1.7,
"learning_rate": 0.00015389349205725242,
"loss": 0.618,
"step": 740
},
{
"epoch": 1.7,
"learning_rate": 0.00015376825225093537,
"loss": 0.7747,
"step": 741
},
{
"epoch": 1.7,
"learning_rate": 0.00015364289369953967,
"loss": 0.7673,
"step": 742
},
{
"epoch": 1.7,
"learning_rate": 0.0001535174166799148,
"loss": 0.8066,
"step": 743
},
{
"epoch": 1.7,
"learning_rate": 0.00015339182146917183,
"loss": 0.8392,
"step": 744
},
{
"epoch": 1.71,
"learning_rate": 0.0001532661083446829,
"loss": 0.7949,
"step": 745
},
{
"epoch": 1.71,
"learning_rate": 0.00015314027758408044,
"loss": 0.8698,
"step": 746
},
{
"epoch": 1.71,
"learning_rate": 0.00015301432946525684,
"loss": 0.7715,
"step": 747
},
{
"epoch": 1.71,
"learning_rate": 0.00015288826426636354,
"loss": 0.7583,
"step": 748
},
{
"epoch": 1.72,
"learning_rate": 0.00015276208226581064,
"loss": 0.8544,
"step": 749
},
{
"epoch": 1.72,
"learning_rate": 0.00015263578374226605,
"loss": 0.8272,
"step": 750
},
{
"epoch": 1.72,
"learning_rate": 0.0001525093689746552,
"loss": 0.857,
"step": 751
},
{
"epoch": 1.72,
"learning_rate": 0.00015238283824216015,
"loss": 0.9208,
"step": 752
},
{
"epoch": 1.73,
"learning_rate": 0.000152256191824219,
"loss": 0.8626,
"step": 753
},
{
"epoch": 1.73,
"learning_rate": 0.00015212943000052545,
"loss": 0.9418,
"step": 754
},
{
"epoch": 1.73,
"learning_rate": 0.00015200255305102803,
"loss": 0.8087,
"step": 755
},
{
"epoch": 1.73,
"learning_rate": 0.00015187556125592945,
"loss": 0.7913,
"step": 756
},
{
"epoch": 1.73,
"learning_rate": 0.00015174845489568622,
"loss": 0.8973,
"step": 757
},
{
"epoch": 1.74,
"learning_rate": 0.00015162123425100762,
"loss": 0.701,
"step": 758
},
{
"epoch": 1.74,
"learning_rate": 0.00015149389960285558,
"loss": 0.898,
"step": 759
},
{
"epoch": 1.74,
"learning_rate": 0.00015136645123244366,
"loss": 0.8809,
"step": 760
},
{
"epoch": 1.74,
"learning_rate": 0.00015123888942123652,
"loss": 0.7334,
"step": 761
},
{
"epoch": 1.75,
"learning_rate": 0.0001511112144509495,
"loss": 0.8506,
"step": 762
},
{
"epoch": 1.75,
"learning_rate": 0.00015098342660354775,
"loss": 0.8469,
"step": 763
},
{
"epoch": 1.75,
"learning_rate": 0.0001508555261612457,
"loss": 1.0353,
"step": 764
},
{
"epoch": 1.75,
"learning_rate": 0.0001507275134065065,
"loss": 0.6269,
"step": 765
},
{
"epoch": 1.75,
"learning_rate": 0.00015059938862204127,
"loss": 0.7825,
"step": 766
},
{
"epoch": 1.76,
"learning_rate": 0.0001504711520908086,
"loss": 0.8388,
"step": 767
},
{
"epoch": 1.76,
"learning_rate": 0.00015034280409601385,
"loss": 0.7383,
"step": 768
},
{
"epoch": 1.76,
"learning_rate": 0.00015021434492110852,
"loss": 0.8029,
"step": 769
},
{
"epoch": 1.76,
"learning_rate": 0.00015008577484978966,
"loss": 0.6527,
"step": 770
},
{
"epoch": 1.77,
"learning_rate": 0.00014995709416599926,
"loss": 0.9434,
"step": 771
},
{
"epoch": 1.77,
"learning_rate": 0.00014982830315392358,
"loss": 0.753,
"step": 772
},
{
"epoch": 1.77,
"learning_rate": 0.00014969940209799248,
"loss": 0.8143,
"step": 773
},
{
"epoch": 1.77,
"learning_rate": 0.00014957039128287892,
"loss": 0.8939,
"step": 774
},
{
"epoch": 1.78,
"learning_rate": 0.0001494412709934982,
"loss": 0.9265,
"step": 775
},
{
"epoch": 1.78,
"learning_rate": 0.00014931204151500747,
"loss": 0.8261,
"step": 776
},
{
"epoch": 1.78,
"learning_rate": 0.00014918270313280495,
"loss": 0.8555,
"step": 777
},
{
"epoch": 1.78,
"learning_rate": 0.00014905325613252937,
"loss": 0.8191,
"step": 778
},
{
"epoch": 1.78,
"learning_rate": 0.00014892370080005936,
"loss": 0.9159,
"step": 779
},
{
"epoch": 1.79,
"learning_rate": 0.00014879403742151283,
"loss": 0.7936,
"step": 780
},
{
"epoch": 1.79,
"learning_rate": 0.00014866426628324625,
"loss": 0.8782,
"step": 781
},
{
"epoch": 1.79,
"learning_rate": 0.00014853438767185412,
"loss": 0.6078,
"step": 782
},
{
"epoch": 1.79,
"learning_rate": 0.0001484044018741682,
"loss": 0.7182,
"step": 783
},
{
"epoch": 1.8,
"learning_rate": 0.00014827430917725712,
"loss": 0.7528,
"step": 784
},
{
"epoch": 1.8,
"learning_rate": 0.00014814410986842543,
"loss": 0.902,
"step": 785
},
{
"epoch": 1.8,
"learning_rate": 0.00014801380423521324,
"loss": 0.8765,
"step": 786
},
{
"epoch": 1.8,
"learning_rate": 0.00014788339256539544,
"loss": 0.6332,
"step": 787
},
{
"epoch": 1.81,
"learning_rate": 0.00014775287514698105,
"loss": 0.7258,
"step": 788
},
{
"epoch": 1.81,
"learning_rate": 0.00014762225226821273,
"loss": 0.7754,
"step": 789
},
{
"epoch": 1.81,
"learning_rate": 0.00014749152421756595,
"loss": 0.7039,
"step": 790
},
{
"epoch": 1.81,
"learning_rate": 0.0001473606912837485,
"loss": 0.8563,
"step": 791
},
{
"epoch": 1.81,
"learning_rate": 0.00014722975375569978,
"loss": 0.8956,
"step": 792
},
{
"epoch": 1.82,
"learning_rate": 0.00014709871192259026,
"loss": 0.8724,
"step": 793
},
{
"epoch": 1.82,
"learning_rate": 0.0001469675660738206,
"loss": 0.8885,
"step": 794
},
{
"epoch": 1.82,
"learning_rate": 0.00014683631649902132,
"loss": 0.7637,
"step": 795
},
{
"epoch": 1.82,
"learning_rate": 0.00014670496348805195,
"loss": 0.7596,
"step": 796
},
{
"epoch": 1.83,
"learning_rate": 0.00014657350733100047,
"loss": 0.8221,
"step": 797
},
{
"epoch": 1.83,
"learning_rate": 0.00014644194831818266,
"loss": 0.8475,
"step": 798
},
{
"epoch": 1.83,
"learning_rate": 0.00014631028674014142,
"loss": 0.7966,
"step": 799
},
{
"epoch": 1.83,
"learning_rate": 0.00014617852288764625,
"loss": 0.9186,
"step": 800
},
{
"epoch": 1.84,
"learning_rate": 0.00014604665705169237,
"loss": 0.9027,
"step": 801
},
{
"epoch": 1.84,
"learning_rate": 0.0001459146895235004,
"loss": 0.9357,
"step": 802
},
{
"epoch": 1.84,
"learning_rate": 0.00014578262059451537,
"loss": 0.9202,
"step": 803
},
{
"epoch": 1.84,
"learning_rate": 0.00014565045055640638,
"loss": 0.9226,
"step": 804
},
{
"epoch": 1.84,
"learning_rate": 0.0001455181797010658,
"loss": 0.8416,
"step": 805
},
{
"epoch": 1.85,
"learning_rate": 0.0001453858083206086,
"loss": 0.8192,
"step": 806
},
{
"epoch": 1.85,
"learning_rate": 0.0001452533367073718,
"loss": 0.8309,
"step": 807
},
{
"epoch": 1.85,
"learning_rate": 0.00014512076515391375,
"loss": 0.7646,
"step": 808
},
{
"epoch": 1.85,
"learning_rate": 0.00014498809395301356,
"loss": 0.9335,
"step": 809
},
{
"epoch": 1.86,
"learning_rate": 0.00014485532339767037,
"loss": 0.9696,
"step": 810
},
{
"epoch": 1.86,
"learning_rate": 0.00014472245378110277,
"loss": 0.7,
"step": 811
},
{
"epoch": 1.86,
"learning_rate": 0.000144589485396748,
"loss": 0.8206,
"step": 812
},
{
"epoch": 1.86,
"learning_rate": 0.0001444564185382617,
"loss": 0.7417,
"step": 813
},
{
"epoch": 1.86,
"learning_rate": 0.00014432325349951667,
"loss": 0.6384,
"step": 814
},
{
"epoch": 1.87,
"learning_rate": 0.00014418999057460276,
"loss": 0.7801,
"step": 815
},
{
"epoch": 1.87,
"learning_rate": 0.0001440566300578259,
"loss": 0.8459,
"step": 816
},
{
"epoch": 1.87,
"learning_rate": 0.0001439231722437075,
"loss": 0.8863,
"step": 817
},
{
"epoch": 1.87,
"learning_rate": 0.000143789617426984,
"loss": 0.8502,
"step": 818
},
{
"epoch": 1.88,
"learning_rate": 0.000143655965902606,
"loss": 0.8522,
"step": 819
},
{
"epoch": 1.88,
"learning_rate": 0.00014352221796573757,
"loss": 0.8612,
"step": 820
},
{
"epoch": 1.88,
"learning_rate": 0.00014338837391175582,
"loss": 0.8065,
"step": 821
},
{
"epoch": 1.88,
"learning_rate": 0.0001432544340362501,
"loss": 0.8777,
"step": 822
},
{
"epoch": 1.89,
"learning_rate": 0.00014312039863502145,
"loss": 0.7731,
"step": 823
},
{
"epoch": 1.89,
"learning_rate": 0.00014298626800408166,
"loss": 0.8791,
"step": 824
},
{
"epoch": 1.89,
"learning_rate": 0.00014285204243965306,
"loss": 0.9095,
"step": 825
},
{
"epoch": 1.89,
"learning_rate": 0.00014271772223816757,
"loss": 0.8846,
"step": 826
},
{
"epoch": 1.89,
"learning_rate": 0.00014258330769626606,
"loss": 0.701,
"step": 827
},
{
"epoch": 1.9,
"learning_rate": 0.00014244879911079779,
"loss": 0.7598,
"step": 828
},
{
"epoch": 1.9,
"learning_rate": 0.00014231419677881966,
"loss": 1.0411,
"step": 829
},
{
"epoch": 1.9,
"learning_rate": 0.00014217950099759569,
"loss": 0.6915,
"step": 830
},
{
"epoch": 1.9,
"learning_rate": 0.00014204471206459628,
"loss": 0.8048,
"step": 831
},
{
"epoch": 1.91,
"learning_rate": 0.0001419098302774974,
"loss": 0.7688,
"step": 832
},
{
"epoch": 1.91,
"learning_rate": 0.00014177485593418028,
"loss": 0.7863,
"step": 833
},
{
"epoch": 1.91,
"learning_rate": 0.0001416397893327304,
"loss": 0.7627,
"step": 834
},
{
"epoch": 1.91,
"learning_rate": 0.00014150463077143712,
"loss": 0.7423,
"step": 835
},
{
"epoch": 1.92,
"learning_rate": 0.00014136938054879283,
"loss": 0.7236,
"step": 836
},
{
"epoch": 1.92,
"learning_rate": 0.00014123403896349227,
"loss": 0.8978,
"step": 837
},
{
"epoch": 1.92,
"learning_rate": 0.00014109860631443213,
"loss": 0.9403,
"step": 838
},
{
"epoch": 1.92,
"learning_rate": 0.00014096308290071003,
"loss": 0.7267,
"step": 839
},
{
"epoch": 1.92,
"learning_rate": 0.00014082746902162414,
"loss": 0.7905,
"step": 840
},
{
"epoch": 1.93,
"learning_rate": 0.00014069176497667242,
"loss": 0.8848,
"step": 841
},
{
"epoch": 1.93,
"learning_rate": 0.00014055597106555192,
"loss": 0.9057,
"step": 842
},
{
"epoch": 1.93,
"learning_rate": 0.00014042008758815818,
"loss": 0.7363,
"step": 843
},
{
"epoch": 1.93,
"learning_rate": 0.00014028411484458454,
"loss": 0.8193,
"step": 844
},
{
"epoch": 1.94,
"learning_rate": 0.00014014805313512145,
"loss": 0.7387,
"step": 845
},
{
"epoch": 1.94,
"learning_rate": 0.00014001190276025593,
"loss": 0.8871,
"step": 846
},
{
"epoch": 1.94,
"learning_rate": 0.0001398756640206707,
"loss": 0.7342,
"step": 847
},
{
"epoch": 1.94,
"learning_rate": 0.00013973933721724363,
"loss": 0.8557,
"step": 848
},
{
"epoch": 1.95,
"learning_rate": 0.0001396029226510472,
"loss": 0.8778,
"step": 849
},
{
"epoch": 1.95,
"learning_rate": 0.00013946642062334766,
"loss": 0.7844,
"step": 850
},
{
"epoch": 1.95,
"learning_rate": 0.00013932983143560433,
"loss": 0.7941,
"step": 851
},
{
"epoch": 1.95,
"learning_rate": 0.00013919315538946905,
"loss": 0.7505,
"step": 852
},
{
"epoch": 1.95,
"learning_rate": 0.0001390563927867856,
"loss": 0.8371,
"step": 853
},
{
"epoch": 1.96,
"learning_rate": 0.00013891954392958878,
"loss": 0.8128,
"step": 854
},
{
"epoch": 1.96,
"learning_rate": 0.0001387826091201039,
"loss": 0.7127,
"step": 855
},
{
"epoch": 1.96,
"learning_rate": 0.00013864558866074622,
"loss": 0.8165,
"step": 856
},
{
"epoch": 1.96,
"learning_rate": 0.00013850848285411994,
"loss": 0.7103,
"step": 857
},
{
"epoch": 1.97,
"learning_rate": 0.00013837129200301794,
"loss": 0.8373,
"step": 858
},
{
"epoch": 1.97,
"learning_rate": 0.00013823401641042084,
"loss": 0.6908,
"step": 859
},
{
"epoch": 1.97,
"learning_rate": 0.00013809665637949637,
"loss": 0.7358,
"step": 860
},
{
"epoch": 1.97,
"learning_rate": 0.00013795921221359877,
"loss": 0.7545,
"step": 861
},
{
"epoch": 1.97,
"learning_rate": 0.00013782168421626816,
"loss": 0.7681,
"step": 862
},
{
"epoch": 1.98,
"learning_rate": 0.00013768407269122967,
"loss": 1.026,
"step": 863
},
{
"epoch": 1.98,
"learning_rate": 0.000137546377942393,
"loss": 0.761,
"step": 864
},
{
"epoch": 1.98,
"learning_rate": 0.0001374086002738516,
"loss": 0.8442,
"step": 865
},
{
"epoch": 1.98,
"learning_rate": 0.00013727073998988202,
"loss": 0.7959,
"step": 866
},
{
"epoch": 1.99,
"learning_rate": 0.00013713279739494333,
"loss": 0.8061,
"step": 867
},
{
"epoch": 1.99,
"learning_rate": 0.00013699477279367636,
"loss": 0.7434,
"step": 868
},
{
"epoch": 1.99,
"learning_rate": 0.000136856666490903,
"loss": 0.7159,
"step": 869
},
{
"epoch": 1.99,
"learning_rate": 0.00013671847879162562,
"loss": 0.867,
"step": 870
},
{
"epoch": 2.0,
"learning_rate": 0.00013658021000102636,
"loss": 0.9237,
"step": 871
},
{
"epoch": 2.0,
"learning_rate": 0.0001364418604244664,
"loss": 0.8545,
"step": 872
},
{
"epoch": 2.0,
"learning_rate": 0.00013630343036748535,
"loss": 0.893,
"step": 873
},
{
"epoch": 2.0,
"learning_rate": 0.00013616492013580062,
"loss": 0.9858,
"step": 874
},
{
"epoch": 2.0,
"learning_rate": 0.0001360263300353066,
"loss": 0.6643,
"step": 875
},
{
"epoch": 2.01,
"learning_rate": 0.0001358876603720741,
"loss": 0.8081,
"step": 876
},
{
"epoch": 2.01,
"learning_rate": 0.00013574891145234962,
"loss": 0.7287,
"step": 877
},
{
"epoch": 2.01,
"learning_rate": 0.00013561008358255468,
"loss": 0.8078,
"step": 878
},
{
"epoch": 2.01,
"learning_rate": 0.0001354711770692853,
"loss": 0.6738,
"step": 879
},
{
"epoch": 2.02,
"learning_rate": 0.00013533219221931102,
"loss": 0.7508,
"step": 880
},
{
"epoch": 2.02,
"learning_rate": 0.0001351931293395744,
"loss": 0.8724,
"step": 881
},
{
"epoch": 2.02,
"learning_rate": 0.0001350539887371904,
"loss": 0.9317,
"step": 882
},
{
"epoch": 2.02,
"learning_rate": 0.00013491477071944557,
"loss": 0.7664,
"step": 883
},
{
"epoch": 2.03,
"learning_rate": 0.00013477547559379748,
"loss": 0.8065,
"step": 884
},
{
"epoch": 2.03,
"learning_rate": 0.00013463610366787392,
"loss": 0.738,
"step": 885
},
{
"epoch": 2.03,
"learning_rate": 0.00013449665524947234,
"loss": 0.7554,
"step": 886
},
{
"epoch": 2.03,
"learning_rate": 0.00013435713064655912,
"loss": 0.7769,
"step": 887
},
{
"epoch": 2.03,
"learning_rate": 0.00013421753016726887,
"loss": 0.6507,
"step": 888
},
{
"epoch": 2.04,
"learning_rate": 0.0001340778541199038,
"loss": 0.7293,
"step": 889
},
{
"epoch": 2.04,
"learning_rate": 0.00013393810281293292,
"loss": 0.8305,
"step": 890
},
{
"epoch": 2.04,
"learning_rate": 0.00013379827655499163,
"loss": 0.7553,
"step": 891
},
{
"epoch": 2.04,
"learning_rate": 0.00013365837565488064,
"loss": 0.7724,
"step": 892
},
{
"epoch": 2.05,
"learning_rate": 0.00013351840042156565,
"loss": 0.7061,
"step": 893
},
{
"epoch": 2.05,
"learning_rate": 0.00013337835116417648,
"loss": 0.7078,
"step": 894
},
{
"epoch": 2.05,
"learning_rate": 0.00013323822819200643,
"loss": 0.8201,
"step": 895
},
{
"epoch": 2.05,
"learning_rate": 0.00013309803181451156,
"loss": 0.746,
"step": 896
},
{
"epoch": 2.05,
"learning_rate": 0.00013295776234131015,
"loss": 0.8276,
"step": 897
},
{
"epoch": 2.06,
"learning_rate": 0.0001328174200821817,
"loss": 0.7922,
"step": 898
},
{
"epoch": 2.06,
"learning_rate": 0.0001326770053470668,
"loss": 0.7577,
"step": 899
},
{
"epoch": 2.06,
"learning_rate": 0.00013253651844606572,
"loss": 0.8217,
"step": 900
},
{
"epoch": 2.06,
"learning_rate": 0.00013239595968943832,
"loss": 0.7883,
"step": 901
},
{
"epoch": 2.07,
"learning_rate": 0.00013225532938760317,
"loss": 0.9568,
"step": 902
},
{
"epoch": 2.07,
"learning_rate": 0.00013211462785113666,
"loss": 0.7348,
"step": 903
},
{
"epoch": 2.07,
"learning_rate": 0.00013197385539077275,
"loss": 0.7558,
"step": 904
},
{
"epoch": 2.07,
"learning_rate": 0.00013183301231740183,
"loss": 0.7066,
"step": 905
},
{
"epoch": 2.08,
"learning_rate": 0.0001316920989420703,
"loss": 0.7663,
"step": 906
},
{
"epoch": 2.08,
"learning_rate": 0.00013155111557597985,
"loss": 0.79,
"step": 907
},
{
"epoch": 2.08,
"learning_rate": 0.00013141006253048672,
"loss": 0.8237,
"step": 908
},
{
"epoch": 2.08,
"learning_rate": 0.0001312689401171011,
"loss": 0.687,
"step": 909
},
{
"epoch": 2.08,
"learning_rate": 0.00013112774864748621,
"loss": 0.8254,
"step": 910
},
{
"epoch": 2.09,
"learning_rate": 0.0001309864884334579,
"loss": 0.7641,
"step": 911
},
{
"epoch": 2.09,
"learning_rate": 0.0001308451597869839,
"loss": 0.7845,
"step": 912
},
{
"epoch": 2.09,
"learning_rate": 0.00013070376302018287,
"loss": 0.8661,
"step": 913
},
{
"epoch": 2.09,
"learning_rate": 0.0001305622984453241,
"loss": 0.9001,
"step": 914
},
{
"epoch": 2.1,
"learning_rate": 0.00013042076637482654,
"loss": 0.7261,
"step": 915
},
{
"epoch": 2.1,
"learning_rate": 0.00013027916712125826,
"loss": 0.7954,
"step": 916
},
{
"epoch": 2.1,
"learning_rate": 0.0001301375009973356,
"loss": 0.792,
"step": 917
},
{
"epoch": 2.1,
"learning_rate": 0.00012999576831592273,
"loss": 0.8423,
"step": 918
},
{
"epoch": 2.11,
"learning_rate": 0.00012985396939003065,
"loss": 0.8529,
"step": 919
},
{
"epoch": 2.11,
"learning_rate": 0.00012971210453281674,
"loss": 0.9086,
"step": 920
},
{
"epoch": 2.11,
"learning_rate": 0.00012957017405758401,
"loss": 0.7099,
"step": 921
},
{
"epoch": 2.11,
"learning_rate": 0.00012942817827778038,
"loss": 0.7515,
"step": 922
},
{
"epoch": 2.11,
"learning_rate": 0.00012928611750699783,
"loss": 0.7972,
"step": 923
},
{
"epoch": 2.12,
"learning_rate": 0.0001291439920589722,
"loss": 0.6615,
"step": 924
},
{
"epoch": 2.12,
"learning_rate": 0.00012900180224758185,
"loss": 0.8229,
"step": 925
},
{
"epoch": 2.12,
"learning_rate": 0.00012885954838684743,
"loss": 0.8146,
"step": 926
},
{
"epoch": 2.12,
"learning_rate": 0.000128717230790931,
"loss": 0.8941,
"step": 927
},
{
"epoch": 2.13,
"learning_rate": 0.00012857484977413545,
"loss": 0.7661,
"step": 928
},
{
"epoch": 2.13,
"learning_rate": 0.00012843240565090365,
"loss": 0.7404,
"step": 929
},
{
"epoch": 2.13,
"learning_rate": 0.00012828989873581785,
"loss": 0.7971,
"step": 930
},
{
"epoch": 2.13,
"learning_rate": 0.000128147329343599,
"loss": 0.6813,
"step": 931
},
{
"epoch": 2.14,
"learning_rate": 0.00012800469778910601,
"loss": 0.7704,
"step": 932
},
{
"epoch": 2.14,
"learning_rate": 0.0001278620043873351,
"loss": 0.7751,
"step": 933
},
{
"epoch": 2.14,
"learning_rate": 0.00012771924945341906,
"loss": 0.841,
"step": 934
},
{
"epoch": 2.14,
"learning_rate": 0.00012757643330262657,
"loss": 0.858,
"step": 935
},
{
"epoch": 2.14,
"learning_rate": 0.00012743355625036143,
"loss": 0.6657,
"step": 936
},
{
"epoch": 2.15,
"learning_rate": 0.00012729061861216213,
"loss": 0.7735,
"step": 937
},
{
"epoch": 2.15,
"learning_rate": 0.00012714762070370077,
"loss": 0.8935,
"step": 938
},
{
"epoch": 2.15,
"learning_rate": 0.00012700456284078264,
"loss": 0.9684,
"step": 939
},
{
"epoch": 2.15,
"learning_rate": 0.0001268614453393454,
"loss": 0.9117,
"step": 940
},
{
"epoch": 2.16,
"learning_rate": 0.00012671826851545851,
"loss": 0.7613,
"step": 941
},
{
"epoch": 2.16,
"learning_rate": 0.00012657503268532236,
"loss": 0.9567,
"step": 942
},
{
"epoch": 2.16,
"learning_rate": 0.00012643173816526764,
"loss": 0.8725,
"step": 943
},
{
"epoch": 2.16,
"learning_rate": 0.00012628838527175464,
"loss": 0.8088,
"step": 944
},
{
"epoch": 2.16,
"learning_rate": 0.00012614497432137273,
"loss": 0.7655,
"step": 945
},
{
"epoch": 2.17,
"learning_rate": 0.00012600150563083927,
"loss": 0.7585,
"step": 946
},
{
"epoch": 2.17,
"learning_rate": 0.0001258579795169993,
"loss": 0.6351,
"step": 947
},
{
"epoch": 2.17,
"learning_rate": 0.0001257143962968246,
"loss": 0.8408,
"step": 948
},
{
"epoch": 2.17,
"learning_rate": 0.00012557075628741307,
"loss": 0.7144,
"step": 949
},
{
"epoch": 2.18,
"learning_rate": 0.00012542705980598813,
"loss": 0.7022,
"step": 950
},
{
"epoch": 2.18,
"learning_rate": 0.00012528330716989769,
"loss": 0.8635,
"step": 951
},
{
"epoch": 2.18,
"learning_rate": 0.0001251394986966139,
"loss": 0.8489,
"step": 952
},
{
"epoch": 2.18,
"learning_rate": 0.00012499563470373212,
"loss": 0.7563,
"step": 953
},
{
"epoch": 2.19,
"learning_rate": 0.00012485171550897037,
"loss": 0.9245,
"step": 954
},
{
"epoch": 2.19,
"learning_rate": 0.00012470774143016853,
"loss": 0.9168,
"step": 955
},
{
"epoch": 2.19,
"learning_rate": 0.0001245637127852877,
"loss": 0.803,
"step": 956
},
{
"epoch": 2.19,
"learning_rate": 0.00012441962989240952,
"loss": 0.722,
"step": 957
},
{
"epoch": 2.19,
"learning_rate": 0.0001242754930697354,
"loss": 0.7944,
"step": 958
},
{
"epoch": 2.2,
"learning_rate": 0.00012413130263558587,
"loss": 0.7759,
"step": 959
},
{
"epoch": 2.2,
"learning_rate": 0.00012398705890839988,
"loss": 0.9407,
"step": 960
},
{
"epoch": 2.2,
"learning_rate": 0.00012384276220673402,
"loss": 0.726,
"step": 961
},
{
"epoch": 2.2,
"learning_rate": 0.00012369841284926188,
"loss": 0.7817,
"step": 962
},
{
"epoch": 2.21,
"learning_rate": 0.00012355401115477345,
"loss": 0.6845,
"step": 963
},
{
"epoch": 2.21,
"learning_rate": 0.00012340955744217412,
"loss": 0.7638,
"step": 964
},
{
"epoch": 2.21,
"learning_rate": 0.0001232650520304843,
"loss": 0.8104,
"step": 965
},
{
"epoch": 2.21,
"learning_rate": 0.00012312049523883852,
"loss": 0.8676,
"step": 966
},
{
"epoch": 2.22,
"learning_rate": 0.0001229758873864848,
"loss": 0.7944,
"step": 967
},
{
"epoch": 2.22,
"learning_rate": 0.00012283122879278393,
"loss": 0.8001,
"step": 968
},
{
"epoch": 2.22,
"learning_rate": 0.00012268651977720866,
"loss": 0.7943,
"step": 969
},
{
"epoch": 2.22,
"learning_rate": 0.0001225417606593433,
"loss": 0.9679,
"step": 970
},
{
"epoch": 2.22,
"learning_rate": 0.00012239695175888263,
"loss": 0.773,
"step": 971
},
{
"epoch": 2.23,
"learning_rate": 0.00012225209339563145,
"loss": 0.7707,
"step": 972
},
{
"epoch": 2.23,
"learning_rate": 0.00012210718588950376,
"loss": 0.6727,
"step": 973
},
{
"epoch": 2.23,
"learning_rate": 0.00012196222956052214,
"loss": 0.7641,
"step": 974
},
{
"epoch": 2.23,
"learning_rate": 0.00012181722472881697,
"loss": 0.8506,
"step": 975
},
{
"epoch": 2.24,
"learning_rate": 0.00012167217171462566,
"loss": 0.8442,
"step": 976
},
{
"epoch": 2.24,
"learning_rate": 0.00012152707083829217,
"loss": 0.7853,
"step": 977
},
{
"epoch": 2.24,
"learning_rate": 0.00012138192242026614,
"loss": 0.7495,
"step": 978
},
{
"epoch": 2.24,
"learning_rate": 0.0001212367267811021,
"loss": 0.739,
"step": 979
},
{
"epoch": 2.25,
"learning_rate": 0.00012109148424145898,
"loss": 0.6531,
"step": 980
},
{
"epoch": 2.25,
"learning_rate": 0.00012094619512209915,
"loss": 0.7721,
"step": 981
},
{
"epoch": 2.25,
"learning_rate": 0.00012080085974388802,
"loss": 0.7346,
"step": 982
},
{
"epoch": 2.25,
"learning_rate": 0.0001206554784277931,
"loss": 0.8709,
"step": 983
},
{
"epoch": 2.25,
"learning_rate": 0.00012051005149488326,
"loss": 0.8111,
"step": 984
},
{
"epoch": 2.26,
"learning_rate": 0.0001203645792663282,
"loss": 0.8296,
"step": 985
},
{
"epoch": 2.26,
"learning_rate": 0.00012021906206339766,
"loss": 0.7569,
"step": 986
},
{
"epoch": 2.26,
"learning_rate": 0.00012007350020746068,
"loss": 0.7945,
"step": 987
},
{
"epoch": 2.26,
"learning_rate": 0.00011992789401998492,
"loss": 0.7818,
"step": 988
},
{
"epoch": 2.27,
"learning_rate": 0.00011978224382253589,
"loss": 0.59,
"step": 989
},
{
"epoch": 2.27,
"learning_rate": 0.00011963654993677645,
"loss": 0.828,
"step": 990
},
{
"epoch": 2.27,
"learning_rate": 0.00011949081268446571,
"loss": 0.7583,
"step": 991
},
{
"epoch": 2.27,
"learning_rate": 0.00011934503238745878,
"loss": 0.7453,
"step": 992
},
{
"epoch": 2.27,
"learning_rate": 0.00011919920936770568,
"loss": 0.826,
"step": 993
},
{
"epoch": 2.28,
"learning_rate": 0.00011905334394725085,
"loss": 0.7673,
"step": 994
},
{
"epoch": 2.28,
"learning_rate": 0.00011890743644823242,
"loss": 0.9637,
"step": 995
},
{
"epoch": 2.28,
"learning_rate": 0.00011876148719288128,
"loss": 0.702,
"step": 996
},
{
"epoch": 2.28,
"learning_rate": 0.00011861549650352069,
"loss": 0.856,
"step": 997
},
{
"epoch": 2.29,
"learning_rate": 0.00011846946470256538,
"loss": 0.725,
"step": 998
},
{
"epoch": 2.29,
"learning_rate": 0.00011832339211252084,
"loss": 0.7615,
"step": 999
},
{
"epoch": 2.29,
"learning_rate": 0.00011817727905598268,
"loss": 0.7691,
"step": 1000
},
{
"epoch": 2.29,
"learning_rate": 0.00011803112585563587,
"loss": 0.8347,
"step": 1001
},
{
"epoch": 2.3,
"learning_rate": 0.00011788493283425397,
"loss": 0.908,
"step": 1002
},
{
"epoch": 2.3,
"learning_rate": 0.00011773870031469862,
"loss": 0.8724,
"step": 1003
},
{
"epoch": 2.3,
"learning_rate": 0.00011759242861991855,
"loss": 0.8801,
"step": 1004
},
{
"epoch": 2.3,
"learning_rate": 0.0001174461180729491,
"loss": 0.861,
"step": 1005
},
{
"epoch": 2.3,
"learning_rate": 0.00011729976899691137,
"loss": 0.8878,
"step": 1006
},
{
"epoch": 2.31,
"learning_rate": 0.00011715338171501156,
"loss": 0.7662,
"step": 1007
},
{
"epoch": 2.31,
"learning_rate": 0.00011700695655054026,
"loss": 0.7814,
"step": 1008
},
{
"epoch": 2.31,
"learning_rate": 0.00011686049382687168,
"loss": 0.8727,
"step": 1009
},
{
"epoch": 2.31,
"learning_rate": 0.000116713993867463,
"loss": 0.8036,
"step": 1010
},
{
"epoch": 2.32,
"learning_rate": 0.00011656745699585371,
"loss": 0.957,
"step": 1011
},
{
"epoch": 2.32,
"learning_rate": 0.00011642088353566469,
"loss": 0.9257,
"step": 1012
},
{
"epoch": 2.32,
"learning_rate": 0.00011627427381059772,
"loss": 0.7994,
"step": 1013
},
{
"epoch": 2.32,
"learning_rate": 0.00011612762814443459,
"loss": 0.6582,
"step": 1014
},
{
"epoch": 2.33,
"learning_rate": 0.00011598094686103653,
"loss": 0.7195,
"step": 1015
},
{
"epoch": 2.33,
"learning_rate": 0.00011583423028434344,
"loss": 0.6673,
"step": 1016
},
{
"epoch": 2.33,
"learning_rate": 0.00011568747873837307,
"loss": 0.8075,
"step": 1017
},
{
"epoch": 2.33,
"learning_rate": 0.00011554069254722051,
"loss": 0.8945,
"step": 1018
},
{
"epoch": 2.33,
"learning_rate": 0.00011539387203505727,
"loss": 0.6828,
"step": 1019
},
{
"epoch": 2.34,
"learning_rate": 0.00011524701752613074,
"loss": 0.7014,
"step": 1020
},
{
"epoch": 2.34,
"learning_rate": 0.00011510012934476338,
"loss": 0.8388,
"step": 1021
},
{
"epoch": 2.34,
"learning_rate": 0.00011495320781535186,
"loss": 0.685,
"step": 1022
},
{
"epoch": 2.34,
"learning_rate": 0.00011480625326236677,
"loss": 0.7141,
"step": 1023
},
{
"epoch": 2.35,
"learning_rate": 0.00011465926601035137,
"loss": 0.8078,
"step": 1024
},
{
"epoch": 2.35,
"learning_rate": 0.00011451224638392129,
"loss": 0.7924,
"step": 1025
},
{
"epoch": 2.35,
"learning_rate": 0.00011436519470776362,
"loss": 0.9223,
"step": 1026
},
{
"epoch": 2.35,
"learning_rate": 0.00011421811130663623,
"loss": 0.8251,
"step": 1027
},
{
"epoch": 2.36,
"learning_rate": 0.00011407099650536706,
"loss": 0.9127,
"step": 1028
},
{
"epoch": 2.36,
"learning_rate": 0.00011392385062885334,
"loss": 0.7634,
"step": 1029
},
{
"epoch": 2.36,
"learning_rate": 0.00011377667400206101,
"loss": 0.7472,
"step": 1030
},
{
"epoch": 2.36,
"learning_rate": 0.00011362946695002383,
"loss": 0.7838,
"step": 1031
},
{
"epoch": 2.36,
"learning_rate": 0.00011348222979784289,
"loss": 0.9502,
"step": 1032
},
{
"epoch": 2.37,
"learning_rate": 0.00011333496287068563,
"loss": 0.7066,
"step": 1033
},
{
"epoch": 2.37,
"learning_rate": 0.00011318766649378532,
"loss": 0.9988,
"step": 1034
},
{
"epoch": 2.37,
"learning_rate": 0.00011304034099244014,
"loss": 0.9448,
"step": 1035
},
{
"epoch": 2.37,
"learning_rate": 0.00011289298669201282,
"loss": 0.7764,
"step": 1036
},
{
"epoch": 2.38,
"learning_rate": 0.00011274560391792948,
"loss": 0.7351,
"step": 1037
},
{
"epoch": 2.38,
"learning_rate": 0.00011259819299567922,
"loss": 0.895,
"step": 1038
},
{
"epoch": 2.38,
"learning_rate": 0.00011245075425081328,
"loss": 0.718,
"step": 1039
},
{
"epoch": 2.38,
"learning_rate": 0.00011230328800894437,
"loss": 0.7811,
"step": 1040
},
{
"epoch": 2.38,
"learning_rate": 0.0001121557945957459,
"loss": 0.7859,
"step": 1041
},
{
"epoch": 2.39,
"learning_rate": 0.00011200827433695127,
"loss": 0.7916,
"step": 1042
},
{
"epoch": 2.39,
"learning_rate": 0.00011186072755835322,
"loss": 0.8321,
"step": 1043
},
{
"epoch": 2.39,
"learning_rate": 0.00011171315458580303,
"loss": 0.7648,
"step": 1044
},
{
"epoch": 2.39,
"learning_rate": 0.00011156555574520981,
"loss": 0.7691,
"step": 1045
},
{
"epoch": 2.4,
"learning_rate": 0.00011141793136253986,
"loss": 0.6978,
"step": 1046
},
{
"epoch": 2.4,
"learning_rate": 0.00011127028176381578,
"loss": 0.6725,
"step": 1047
},
{
"epoch": 2.4,
"learning_rate": 0.00011112260727511596,
"loss": 0.8165,
"step": 1048
},
{
"epoch": 2.4,
"learning_rate": 0.00011097490822257377,
"loss": 0.8662,
"step": 1049
},
{
"epoch": 2.41,
"learning_rate": 0.00011082718493237669,
"loss": 0.8784,
"step": 1050
},
{
"epoch": 2.41,
"learning_rate": 0.00011067943773076586,
"loss": 0.8533,
"step": 1051
},
{
"epoch": 2.41,
"learning_rate": 0.00011053166694403521,
"loss": 0.6602,
"step": 1052
},
{
"epoch": 2.41,
"learning_rate": 0.0001103838728985307,
"loss": 0.8363,
"step": 1053
},
{
"epoch": 2.41,
"learning_rate": 0.0001102360559206497,
"loss": 0.8044,
"step": 1054
},
{
"epoch": 2.42,
"learning_rate": 0.00011008821633684019,
"loss": 0.8684,
"step": 1055
},
{
"epoch": 2.42,
"learning_rate": 0.00010994035447360018,
"loss": 0.7158,
"step": 1056
},
{
"epoch": 2.42,
"learning_rate": 0.0001097924706574767,
"loss": 0.7729,
"step": 1057
},
{
"epoch": 2.42,
"learning_rate": 0.00010964456521506545,
"loss": 0.685,
"step": 1058
},
{
"epoch": 2.43,
"learning_rate": 0.00010949663847300976,
"loss": 0.8647,
"step": 1059
},
{
"epoch": 2.43,
"learning_rate": 0.000109348690758,
"loss": 0.836,
"step": 1060
},
{
"epoch": 2.43,
"learning_rate": 0.00010920072239677301,
"loss": 0.8494,
"step": 1061
},
{
"epoch": 2.43,
"learning_rate": 0.00010905273371611105,
"loss": 0.9494,
"step": 1062
},
{
"epoch": 2.44,
"learning_rate": 0.00010890472504284133,
"loss": 0.7832,
"step": 1063
},
{
"epoch": 2.44,
"learning_rate": 0.00010875669670383521,
"loss": 0.7709,
"step": 1064
},
{
"epoch": 2.44,
"learning_rate": 0.00010860864902600747,
"loss": 0.8175,
"step": 1065
},
{
"epoch": 2.44,
"learning_rate": 0.00010846058233631565,
"loss": 0.8179,
"step": 1066
},
{
"epoch": 2.44,
"learning_rate": 0.00010831249696175918,
"loss": 0.7686,
"step": 1067
},
{
"epoch": 2.45,
"learning_rate": 0.00010816439322937879,
"loss": 0.8491,
"step": 1068
},
{
"epoch": 2.45,
"learning_rate": 0.00010801627146625588,
"loss": 0.7961,
"step": 1069
},
{
"epoch": 2.45,
"learning_rate": 0.00010786813199951145,
"loss": 0.8408,
"step": 1070
},
{
"epoch": 2.45,
"learning_rate": 0.00010771997515630574,
"loss": 0.8916,
"step": 1071
},
{
"epoch": 2.46,
"learning_rate": 0.00010757180126383735,
"loss": 0.8035,
"step": 1072
},
{
"epoch": 2.46,
"learning_rate": 0.0001074236106493425,
"loss": 0.9132,
"step": 1073
},
{
"epoch": 2.46,
"learning_rate": 0.0001072754036400944,
"loss": 0.8029,
"step": 1074
},
{
"epoch": 2.46,
"learning_rate": 0.00010712718056340236,
"loss": 0.6981,
"step": 1075
},
{
"epoch": 2.47,
"learning_rate": 0.00010697894174661127,
"loss": 0.7829,
"step": 1076
},
{
"epoch": 2.47,
"learning_rate": 0.00010683068751710075,
"loss": 0.7699,
"step": 1077
},
{
"epoch": 2.47,
"learning_rate": 0.00010668241820228444,
"loss": 0.7342,
"step": 1078
},
{
"epoch": 2.47,
"learning_rate": 0.00010653413412960935,
"loss": 0.7729,
"step": 1079
},
{
"epoch": 2.47,
"learning_rate": 0.00010638583562655498,
"loss": 0.9097,
"step": 1080
},
{
"epoch": 2.48,
"learning_rate": 0.00010623752302063283,
"loss": 0.8692,
"step": 1081
},
{
"epoch": 2.48,
"learning_rate": 0.00010608919663938549,
"loss": 0.8861,
"step": 1082
},
{
"epoch": 2.48,
"learning_rate": 0.00010594085681038588,
"loss": 0.7454,
"step": 1083
},
{
"epoch": 2.48,
"learning_rate": 0.00010579250386123676,
"loss": 0.8291,
"step": 1084
},
{
"epoch": 2.49,
"learning_rate": 0.0001056441381195698,
"loss": 0.7643,
"step": 1085
},
{
"epoch": 2.49,
"learning_rate": 0.00010549575991304492,
"loss": 0.8242,
"step": 1086
},
{
"epoch": 2.49,
"learning_rate": 0.0001053473695693496,
"loss": 0.9521,
"step": 1087
},
{
"epoch": 2.49,
"learning_rate": 0.00010519896741619803,
"loss": 0.8142,
"step": 1088
},
{
"epoch": 2.49,
"learning_rate": 0.00010505055378133067,
"loss": 0.7955,
"step": 1089
},
{
"epoch": 2.5,
"learning_rate": 0.00010490212899251309,
"loss": 0.7363,
"step": 1090
},
{
"epoch": 2.5,
"learning_rate": 0.00010475369337753569,
"loss": 0.8173,
"step": 1091
},
{
"epoch": 2.5,
"learning_rate": 0.00010460524726421275,
"loss": 0.7659,
"step": 1092
},
{
"epoch": 2.5,
"learning_rate": 0.00010445679098038157,
"loss": 0.8618,
"step": 1093
},
{
"epoch": 2.51,
"learning_rate": 0.00010430832485390217,
"loss": 0.7606,
"step": 1094
},
{
"epoch": 2.51,
"learning_rate": 0.00010415984921265609,
"loss": 0.8721,
"step": 1095
},
{
"epoch": 2.51,
"learning_rate": 0.00010401136438454599,
"loss": 0.8152,
"step": 1096
},
{
"epoch": 2.51,
"learning_rate": 0.0001038628706974948,
"loss": 0.8934,
"step": 1097
},
{
"epoch": 2.52,
"learning_rate": 0.00010371436847944503,
"loss": 0.8385,
"step": 1098
},
{
"epoch": 2.52,
"learning_rate": 0.00010356585805835797,
"loss": 0.8581,
"step": 1099
},
{
"epoch": 2.52,
"learning_rate": 0.00010341733976221313,
"loss": 0.788,
"step": 1100
},
{
"epoch": 2.52,
"learning_rate": 0.00010326881391900724,
"loss": 0.7872,
"step": 1101
},
{
"epoch": 2.52,
"learning_rate": 0.00010312028085675391,
"loss": 0.819,
"step": 1102
},
{
"epoch": 2.53,
"learning_rate": 0.00010297174090348255,
"loss": 0.854,
"step": 1103
},
{
"epoch": 2.53,
"learning_rate": 0.00010282319438723782,
"loss": 0.7121,
"step": 1104
},
{
"epoch": 2.53,
"learning_rate": 0.00010267464163607889,
"loss": 0.8977,
"step": 1105
},
{
"epoch": 2.53,
"learning_rate": 0.00010252608297807871,
"loss": 0.8411,
"step": 1106
},
{
"epoch": 2.54,
"learning_rate": 0.00010237751874132322,
"loss": 0.834,
"step": 1107
},
{
"epoch": 2.54,
"learning_rate": 0.00010222894925391073,
"loss": 0.7582,
"step": 1108
},
{
"epoch": 2.54,
"learning_rate": 0.00010208037484395114,
"loss": 0.7773,
"step": 1109
},
{
"epoch": 2.54,
"learning_rate": 0.00010193179583956523,
"loss": 0.7294,
"step": 1110
},
{
"epoch": 2.55,
"learning_rate": 0.00010178321256888385,
"loss": 0.89,
"step": 1111
},
{
"epoch": 2.55,
"learning_rate": 0.00010163462536004742,
"loss": 0.7675,
"step": 1112
},
{
"epoch": 2.55,
"learning_rate": 0.00010148603454120487,
"loss": 0.7291,
"step": 1113
},
{
"epoch": 2.55,
"learning_rate": 0.00010133744044051328,
"loss": 0.8403,
"step": 1114
},
{
"epoch": 2.55,
"learning_rate": 0.00010118884338613688,
"loss": 0.8955,
"step": 1115
},
{
"epoch": 2.56,
"learning_rate": 0.00010104024370624644,
"loss": 0.7537,
"step": 1116
},
{
"epoch": 2.56,
"learning_rate": 0.00010089164172901851,
"loss": 0.8734,
"step": 1117
},
{
"epoch": 2.56,
"learning_rate": 0.00010074303778263474,
"loss": 0.7312,
"step": 1118
},
{
"epoch": 2.56,
"learning_rate": 0.00010059443219528117,
"loss": 0.7906,
"step": 1119
},
{
"epoch": 2.57,
"learning_rate": 0.00010044582529514739,
"loss": 0.7756,
"step": 1120
},
{
"epoch": 2.57,
"learning_rate": 0.00010029721741042586,
"loss": 0.9158,
"step": 1121
},
{
"epoch": 2.57,
"learning_rate": 0.00010014860886931139,
"loss": 0.8481,
"step": 1122
},
{
"epoch": 2.57,
"learning_rate": 0.0001,
"loss": 0.8187,
"step": 1123
},
{
"epoch": 2.58,
"learning_rate": 9.985139113068865e-05,
"loss": 0.8507,
"step": 1124
},
{
"epoch": 2.58,
"learning_rate": 9.970278258957415e-05,
"loss": 0.7585,
"step": 1125
},
{
"epoch": 2.58,
"learning_rate": 9.955417470485265e-05,
"loss": 0.7163,
"step": 1126
},
{
"epoch": 2.58,
"learning_rate": 9.940556780471885e-05,
"loss": 0.8124,
"step": 1127
},
{
"epoch": 2.58,
"learning_rate": 9.925696221736525e-05,
"loss": 0.924,
"step": 1128
},
{
"epoch": 2.59,
"learning_rate": 9.91083582709815e-05,
"loss": 0.843,
"step": 1129
},
{
"epoch": 2.59,
"learning_rate": 9.895975629375359e-05,
"loss": 0.8461,
"step": 1130
},
{
"epoch": 2.59,
"learning_rate": 9.881115661386314e-05,
"loss": 0.757,
"step": 1131
},
{
"epoch": 2.59,
"learning_rate": 9.866255955948676e-05,
"loss": 0.7779,
"step": 1132
},
{
"epoch": 2.6,
"learning_rate": 9.851396545879516e-05,
"loss": 0.8325,
"step": 1133
},
{
"epoch": 2.6,
"learning_rate": 9.836537463995262e-05,
"loss": 0.7117,
"step": 1134
},
{
"epoch": 2.6,
"learning_rate": 9.821678743111618e-05,
"loss": 0.7209,
"step": 1135
},
{
"epoch": 2.6,
"learning_rate": 9.806820416043478e-05,
"loss": 0.6621,
"step": 1136
},
{
"epoch": 2.6,
"learning_rate": 9.791962515604887e-05,
"loss": 0.7836,
"step": 1137
},
{
"epoch": 2.61,
"learning_rate": 9.777105074608928e-05,
"loss": 0.8576,
"step": 1138
},
{
"epoch": 2.61,
"learning_rate": 9.762248125867678e-05,
"loss": 0.6352,
"step": 1139
},
{
"epoch": 2.61,
"learning_rate": 9.747391702192132e-05,
"loss": 0.7828,
"step": 1140
},
{
"epoch": 2.61,
"learning_rate": 9.732535836392113e-05,
"loss": 0.6583,
"step": 1141
},
{
"epoch": 2.62,
"learning_rate": 9.717680561276219e-05,
"loss": 0.9171,
"step": 1142
},
{
"epoch": 2.62,
"learning_rate": 9.702825909651748e-05,
"loss": 0.8694,
"step": 1143
},
{
"epoch": 2.62,
"learning_rate": 9.687971914324607e-05,
"loss": 0.9293,
"step": 1144
},
{
"epoch": 2.62,
"learning_rate": 9.673118608099276e-05,
"loss": 0.7273,
"step": 1145
},
{
"epoch": 2.63,
"learning_rate": 9.658266023778689e-05,
"loss": 0.8386,
"step": 1146
},
{
"epoch": 2.63,
"learning_rate": 9.643414194164204e-05,
"loss": 0.727,
"step": 1147
},
{
"epoch": 2.63,
"learning_rate": 9.628563152055498e-05,
"loss": 0.9991,
"step": 1148
},
{
"epoch": 2.63,
"learning_rate": 9.61371293025052e-05,
"loss": 0.7304,
"step": 1149
},
{
"epoch": 2.63,
"learning_rate": 9.598863561545404e-05,
"loss": 0.8146,
"step": 1150
},
{
"epoch": 2.64,
"learning_rate": 9.584015078734395e-05,
"loss": 0.8178,
"step": 1151
},
{
"epoch": 2.64,
"learning_rate": 9.569167514609786e-05,
"loss": 0.7202,
"step": 1152
},
{
"epoch": 2.64,
"learning_rate": 9.554320901961843e-05,
"loss": 0.728,
"step": 1153
},
{
"epoch": 2.64,
"learning_rate": 9.539475273578729e-05,
"loss": 0.7842,
"step": 1154
},
{
"epoch": 2.65,
"learning_rate": 9.524630662246432e-05,
"loss": 0.7706,
"step": 1155
},
{
"epoch": 2.65,
"learning_rate": 9.509787100748692e-05,
"loss": 0.802,
"step": 1156
},
{
"epoch": 2.65,
"learning_rate": 9.494944621866937e-05,
"loss": 0.9293,
"step": 1157
},
{
"epoch": 2.65,
"learning_rate": 9.480103258380198e-05,
"loss": 0.8051,
"step": 1158
},
{
"epoch": 2.66,
"learning_rate": 9.465263043065045e-05,
"loss": 0.7449,
"step": 1159
},
{
"epoch": 2.66,
"learning_rate": 9.450424008695509e-05,
"loss": 0.7289,
"step": 1160
},
{
"epoch": 2.66,
"learning_rate": 9.43558618804302e-05,
"loss": 0.6778,
"step": 1161
},
{
"epoch": 2.66,
"learning_rate": 9.420749613876325e-05,
"loss": 0.7731,
"step": 1162
},
{
"epoch": 2.66,
"learning_rate": 9.405914318961414e-05,
"loss": 0.6934,
"step": 1163
},
{
"epoch": 2.67,
"learning_rate": 9.391080336061454e-05,
"loss": 0.9045,
"step": 1164
},
{
"epoch": 2.67,
"learning_rate": 9.376247697936719e-05,
"loss": 0.8016,
"step": 1165
},
{
"epoch": 2.67,
"learning_rate": 9.361416437344503e-05,
"loss": 0.6214,
"step": 1166
},
{
"epoch": 2.67,
"learning_rate": 9.34658658703907e-05,
"loss": 0.6771,
"step": 1167
},
{
"epoch": 2.68,
"learning_rate": 9.331758179771561e-05,
"loss": 0.748,
"step": 1168
},
{
"epoch": 2.68,
"learning_rate": 9.316931248289926e-05,
"loss": 0.665,
"step": 1169
},
{
"epoch": 2.68,
"learning_rate": 9.302105825338876e-05,
"loss": 0.901,
"step": 1170
},
{
"epoch": 2.68,
"learning_rate": 9.287281943659767e-05,
"loss": 0.8342,
"step": 1171
},
{
"epoch": 2.68,
"learning_rate": 9.272459635990562e-05,
"loss": 0.853,
"step": 1172
},
{
"epoch": 2.69,
"learning_rate": 9.257638935065753e-05,
"loss": 0.8093,
"step": 1173
},
{
"epoch": 2.69,
"learning_rate": 9.242819873616268e-05,
"loss": 0.8451,
"step": 1174
},
{
"epoch": 2.69,
"learning_rate": 9.228002484369429e-05,
"loss": 0.8628,
"step": 1175
},
{
"epoch": 2.69,
"learning_rate": 9.213186800048861e-05,
"loss": 0.7858,
"step": 1176
},
{
"epoch": 2.7,
"learning_rate": 9.198372853374415e-05,
"loss": 0.9236,
"step": 1177
},
{
"epoch": 2.7,
"learning_rate": 9.183560677062119e-05,
"loss": 0.7925,
"step": 1178
},
{
"epoch": 2.7,
"learning_rate": 9.168750303824084e-05,
"loss": 0.7105,
"step": 1179
},
{
"epoch": 2.7,
"learning_rate": 9.153941766368439e-05,
"loss": 0.7521,
"step": 1180
},
{
"epoch": 2.71,
"learning_rate": 9.139135097399254e-05,
"loss": 0.8648,
"step": 1181
},
{
"epoch": 2.71,
"learning_rate": 9.124330329616482e-05,
"loss": 0.8409,
"step": 1182
},
{
"epoch": 2.71,
"learning_rate": 9.109527495715872e-05,
"loss": 0.7198,
"step": 1183
},
{
"epoch": 2.71,
"learning_rate": 9.094726628388899e-05,
"loss": 0.7365,
"step": 1184
},
{
"epoch": 2.71,
"learning_rate": 9.0799277603227e-05,
"loss": 0.7699,
"step": 1185
},
{
"epoch": 2.72,
"learning_rate": 9.065130924199998e-05,
"loss": 0.8041,
"step": 1186
},
{
"epoch": 2.72,
"learning_rate": 9.050336152699025e-05,
"loss": 0.8308,
"step": 1187
},
{
"epoch": 2.72,
"learning_rate": 9.035543478493458e-05,
"loss": 0.8139,
"step": 1188
},
{
"epoch": 2.72,
"learning_rate": 9.02075293425233e-05,
"loss": 0.7394,
"step": 1189
},
{
"epoch": 2.73,
"learning_rate": 9.005964552639984e-05,
"loss": 0.6738,
"step": 1190
},
{
"epoch": 2.73,
"learning_rate": 8.991178366315982e-05,
"loss": 0.9421,
"step": 1191
},
{
"epoch": 2.73,
"learning_rate": 8.976394407935034e-05,
"loss": 0.8747,
"step": 1192
},
{
"epoch": 2.73,
"learning_rate": 8.961612710146934e-05,
"loss": 0.8282,
"step": 1193
},
{
"epoch": 2.74,
"learning_rate": 8.94683330559648e-05,
"loss": 0.765,
"step": 1194
},
{
"epoch": 2.74,
"learning_rate": 8.932056226923416e-05,
"loss": 0.8515,
"step": 1195
},
{
"epoch": 2.74,
"learning_rate": 8.917281506762335e-05,
"loss": 0.6194,
"step": 1196
},
{
"epoch": 2.74,
"learning_rate": 8.902509177742626e-05,
"loss": 0.8852,
"step": 1197
},
{
"epoch": 2.74,
"learning_rate": 8.887739272488406e-05,
"loss": 0.7481,
"step": 1198
},
{
"epoch": 2.75,
"learning_rate": 8.872971823618424e-05,
"loss": 0.7979,
"step": 1199
},
{
"epoch": 2.75,
"learning_rate": 8.858206863746018e-05,
"loss": 0.8332,
"step": 1200
},
{
"epoch": 2.75,
"learning_rate": 8.843444425479022e-05,
"loss": 0.6716,
"step": 1201
},
{
"epoch": 2.75,
"learning_rate": 8.828684541419696e-05,
"loss": 0.9192,
"step": 1202
},
{
"epoch": 2.76,
"learning_rate": 8.813927244164679e-05,
"loss": 0.8463,
"step": 1203
},
{
"epoch": 2.76,
"learning_rate": 8.799172566304874e-05,
"loss": 0.6598,
"step": 1204
},
{
"epoch": 2.76,
"learning_rate": 8.784420540425412e-05,
"loss": 0.7823,
"step": 1205
},
{
"epoch": 2.76,
"learning_rate": 8.769671199105565e-05,
"loss": 0.8728,
"step": 1206
},
{
"epoch": 2.77,
"learning_rate": 8.754924574918675e-05,
"loss": 0.7665,
"step": 1207
},
{
"epoch": 2.77,
"learning_rate": 8.74018070043208e-05,
"loss": 0.8008,
"step": 1208
},
{
"epoch": 2.77,
"learning_rate": 8.725439608207056e-05,
"loss": 0.6833,
"step": 1209
},
{
"epoch": 2.77,
"learning_rate": 8.710701330798719e-05,
"loss": 0.7801,
"step": 1210
},
{
"epoch": 2.77,
"learning_rate": 8.695965900755985e-05,
"loss": 0.6308,
"step": 1211
},
{
"epoch": 2.78,
"learning_rate": 8.68123335062147e-05,
"loss": 0.7851,
"step": 1212
},
{
"epoch": 2.78,
"learning_rate": 8.666503712931439e-05,
"loss": 0.7592,
"step": 1213
},
{
"epoch": 2.78,
"learning_rate": 8.651777020215712e-05,
"loss": 0.8727,
"step": 1214
},
{
"epoch": 2.78,
"learning_rate": 8.637053304997618e-05,
"loss": 0.903,
"step": 1215
},
{
"epoch": 2.79,
"learning_rate": 8.622332599793906e-05,
"loss": 0.8076,
"step": 1216
},
{
"epoch": 2.79,
"learning_rate": 8.607614937114671e-05,
"loss": 0.8975,
"step": 1217
},
{
"epoch": 2.79,
"learning_rate": 8.592900349463297e-05,
"loss": 0.8249,
"step": 1218
},
{
"epoch": 2.79,
"learning_rate": 8.578188869336377e-05,
"loss": 0.8529,
"step": 1219
},
{
"epoch": 2.79,
"learning_rate": 8.563480529223638e-05,
"loss": 0.8351,
"step": 1220
},
{
"epoch": 2.8,
"learning_rate": 8.548775361607872e-05,
"loss": 0.8934,
"step": 1221
},
{
"epoch": 2.8,
"learning_rate": 8.534073398964866e-05,
"loss": 0.8067,
"step": 1222
},
{
"epoch": 2.8,
"learning_rate": 8.519374673763326e-05,
"loss": 0.8508,
"step": 1223
},
{
"epoch": 2.8,
"learning_rate": 8.504679218464816e-05,
"loss": 0.7419,
"step": 1224
},
{
"epoch": 2.81,
"learning_rate": 8.489987065523668e-05,
"loss": 0.7808,
"step": 1225
},
{
"epoch": 2.81,
"learning_rate": 8.475298247386927e-05,
"loss": 0.8603,
"step": 1226
},
{
"epoch": 2.81,
"learning_rate": 8.460612796494272e-05,
"loss": 0.8818,
"step": 1227
},
{
"epoch": 2.81,
"learning_rate": 8.445930745277953e-05,
"loss": 0.779,
"step": 1228
},
{
"epoch": 2.82,
"learning_rate": 8.431252126162695e-05,
"loss": 0.766,
"step": 1229
},
{
"epoch": 2.82,
"learning_rate": 8.41657697156566e-05,
"loss": 0.8743,
"step": 1230
},
{
"epoch": 2.82,
"learning_rate": 8.40190531389635e-05,
"loss": 0.882,
"step": 1231
},
{
"epoch": 2.82,
"learning_rate": 8.387237185556545e-05,
"loss": 0.7422,
"step": 1232
},
{
"epoch": 2.82,
"learning_rate": 8.372572618940231e-05,
"loss": 0.9271,
"step": 1233
},
{
"epoch": 2.83,
"learning_rate": 8.357911646433535e-05,
"loss": 0.8051,
"step": 1234
},
{
"epoch": 2.83,
"learning_rate": 8.343254300414628e-05,
"loss": 0.782,
"step": 1235
},
{
"epoch": 2.83,
"learning_rate": 8.3286006132537e-05,
"loss": 0.8754,
"step": 1236
},
{
"epoch": 2.83,
"learning_rate": 8.313950617312835e-05,
"loss": 0.8249,
"step": 1237
},
{
"epoch": 2.84,
"learning_rate": 8.299304344945977e-05,
"loss": 0.8342,
"step": 1238
},
{
"epoch": 2.84,
"learning_rate": 8.284661828498847e-05,
"loss": 0.8593,
"step": 1239
},
{
"epoch": 2.84,
"learning_rate": 8.270023100308865e-05,
"loss": 0.7507,
"step": 1240
},
{
"epoch": 2.84,
"learning_rate": 8.255388192705093e-05,
"loss": 0.8462,
"step": 1241
},
{
"epoch": 2.85,
"learning_rate": 8.240757138008149e-05,
"loss": 0.8322,
"step": 1242
},
{
"epoch": 2.85,
"learning_rate": 8.22612996853014e-05,
"loss": 0.8963,
"step": 1243
},
{
"epoch": 2.85,
"learning_rate": 8.211506716574602e-05,
"loss": 0.7419,
"step": 1244
},
{
"epoch": 2.85,
"learning_rate": 8.196887414436416e-05,
"loss": 0.8225,
"step": 1245
},
{
"epoch": 2.85,
"learning_rate": 8.182272094401735e-05,
"loss": 0.8539,
"step": 1246
},
{
"epoch": 2.86,
"learning_rate": 8.167660788747919e-05,
"loss": 0.7852,
"step": 1247
},
{
"epoch": 2.86,
"learning_rate": 8.153053529743465e-05,
"loss": 0.9128,
"step": 1248
},
{
"epoch": 2.86,
"learning_rate": 8.138450349647936e-05,
"loss": 0.7328,
"step": 1249
},
{
"epoch": 2.86,
"learning_rate": 8.123851280711877e-05,
"loss": 0.8816,
"step": 1250
},
{
"epoch": 2.87,
"learning_rate": 8.10925635517676e-05,
"loss": 0.7267,
"step": 1251
},
{
"epoch": 2.87,
"learning_rate": 8.094665605274913e-05,
"loss": 0.7362,
"step": 1252
},
{
"epoch": 2.87,
"learning_rate": 8.080079063229432e-05,
"loss": 0.7475,
"step": 1253
},
{
"epoch": 2.87,
"learning_rate": 8.065496761254126e-05,
"loss": 0.7727,
"step": 1254
},
{
"epoch": 2.88,
"learning_rate": 8.050918731553431e-05,
"loss": 0.746,
"step": 1255
},
{
"epoch": 2.88,
"learning_rate": 8.036345006322359e-05,
"loss": 0.8132,
"step": 1256
},
{
"epoch": 2.88,
"learning_rate": 8.021775617746412e-05,
"loss": 0.6752,
"step": 1257
},
{
"epoch": 2.88,
"learning_rate": 8.007210598001512e-05,
"loss": 0.7468,
"step": 1258
},
{
"epoch": 2.88,
"learning_rate": 7.992649979253934e-05,
"loss": 0.9141,
"step": 1259
},
{
"epoch": 2.89,
"learning_rate": 7.978093793660233e-05,
"loss": 0.7706,
"step": 1260
},
{
"epoch": 2.89,
"learning_rate": 7.963542073367181e-05,
"loss": 0.8399,
"step": 1261
},
{
"epoch": 2.89,
"learning_rate": 7.948994850511677e-05,
"loss": 0.834,
"step": 1262
},
{
"epoch": 2.89,
"learning_rate": 7.934452157220694e-05,
"loss": 0.767,
"step": 1263
},
{
"epoch": 2.9,
"learning_rate": 7.9199140256112e-05,
"loss": 0.75,
"step": 1264
},
{
"epoch": 2.9,
"learning_rate": 7.905380487790088e-05,
"loss": 0.81,
"step": 1265
},
{
"epoch": 2.9,
"learning_rate": 7.890851575854108e-05,
"loss": 0.8931,
"step": 1266
},
{
"epoch": 2.9,
"learning_rate": 7.876327321889795e-05,
"loss": 0.8929,
"step": 1267
},
{
"epoch": 2.9,
"learning_rate": 7.861807757973387e-05,
"loss": 0.787,
"step": 1268
},
{
"epoch": 2.91,
"learning_rate": 7.847292916170784e-05,
"loss": 0.8072,
"step": 1269
},
{
"epoch": 2.91,
"learning_rate": 7.832782828537437e-05,
"loss": 0.8121,
"step": 1270
},
{
"epoch": 2.91,
"learning_rate": 7.818277527118307e-05,
"loss": 0.7951,
"step": 1271
},
{
"epoch": 2.91,
"learning_rate": 7.803777043947789e-05,
"loss": 0.7093,
"step": 1272
},
{
"epoch": 2.92,
"learning_rate": 7.789281411049625e-05,
"loss": 0.7827,
"step": 1273
},
{
"epoch": 2.92,
"learning_rate": 7.774790660436858e-05,
"loss": 0.7433,
"step": 1274
},
{
"epoch": 2.92,
"learning_rate": 7.760304824111741e-05,
"loss": 0.7359,
"step": 1275
},
{
"epoch": 2.92,
"learning_rate": 7.745823934065671e-05,
"loss": 0.7157,
"step": 1276
},
{
"epoch": 2.93,
"learning_rate": 7.731348022279134e-05,
"loss": 0.961,
"step": 1277
},
{
"epoch": 2.93,
"learning_rate": 7.716877120721611e-05,
"loss": 0.7718,
"step": 1278
},
{
"epoch": 2.93,
"learning_rate": 7.702411261351523e-05,
"loss": 0.835,
"step": 1279
},
{
"epoch": 2.93,
"learning_rate": 7.68795047611615e-05,
"loss": 0.9129,
"step": 1280
},
{
"epoch": 2.93,
"learning_rate": 7.673494796951573e-05,
"loss": 0.7635,
"step": 1281
},
{
"epoch": 2.94,
"learning_rate": 7.659044255782593e-05,
"loss": 0.6873,
"step": 1282
},
{
"epoch": 2.94,
"learning_rate": 7.644598884522659e-05,
"loss": 0.6434,
"step": 1283
},
{
"epoch": 2.94,
"learning_rate": 7.630158715073813e-05,
"loss": 0.8408,
"step": 1284
},
{
"epoch": 2.94,
"learning_rate": 7.615723779326599e-05,
"loss": 0.9042,
"step": 1285
},
{
"epoch": 2.95,
"learning_rate": 7.601294109160012e-05,
"loss": 0.7996,
"step": 1286
},
{
"epoch": 2.95,
"learning_rate": 7.586869736441413e-05,
"loss": 0.923,
"step": 1287
},
{
"epoch": 2.95,
"learning_rate": 7.572450693026462e-05,
"loss": 0.7661,
"step": 1288
},
{
"epoch": 2.95,
"learning_rate": 7.55803701075905e-05,
"loss": 0.9105,
"step": 1289
},
{
"epoch": 2.96,
"learning_rate": 7.543628721471233e-05,
"loss": 0.8071,
"step": 1290
},
{
"epoch": 2.96,
"learning_rate": 7.52922585698315e-05,
"loss": 0.8234,
"step": 1291
},
{
"epoch": 2.96,
"learning_rate": 7.514828449102966e-05,
"loss": 0.8131,
"step": 1292
},
{
"epoch": 2.96,
"learning_rate": 7.500436529626786e-05,
"loss": 0.8149,
"step": 1293
},
{
"epoch": 2.96,
"learning_rate": 7.486050130338612e-05,
"loss": 0.8441,
"step": 1294
},
{
"epoch": 2.97,
"learning_rate": 7.471669283010232e-05,
"loss": 0.8269,
"step": 1295
},
{
"epoch": 2.97,
"learning_rate": 7.457294019401191e-05,
"loss": 0.632,
"step": 1296
},
{
"epoch": 2.97,
"learning_rate": 7.442924371258694e-05,
"loss": 0.8522,
"step": 1297
},
{
"epoch": 2.97,
"learning_rate": 7.428560370317542e-05,
"loss": 0.8387,
"step": 1298
},
{
"epoch": 2.98,
"learning_rate": 7.414202048300072e-05,
"loss": 0.887,
"step": 1299
},
{
"epoch": 2.98,
"learning_rate": 7.399849436916077e-05,
"loss": 0.8273,
"step": 1300
},
{
"epoch": 2.98,
"learning_rate": 7.385502567862728e-05,
"loss": 0.7807,
"step": 1301
},
{
"epoch": 2.98,
"learning_rate": 7.371161472824536e-05,
"loss": 0.9077,
"step": 1302
},
{
"epoch": 2.99,
"learning_rate": 7.35682618347324e-05,
"loss": 0.8779,
"step": 1303
},
{
"epoch": 2.99,
"learning_rate": 7.342496731467767e-05,
"loss": 0.8595,
"step": 1304
},
{
"epoch": 2.99,
"learning_rate": 7.328173148454151e-05,
"loss": 0.8391,
"step": 1305
},
{
"epoch": 2.99,
"learning_rate": 7.31385546606546e-05,
"loss": 0.7559,
"step": 1306
},
{
"epoch": 2.99,
"learning_rate": 7.29954371592174e-05,
"loss": 0.8926,
"step": 1307
},
{
"epoch": 3.0,
"learning_rate": 7.285237929629928e-05,
"loss": 0.8443,
"step": 1308
},
{
"epoch": 3.0,
"learning_rate": 7.27093813878379e-05,
"loss": 0.7854,
"step": 1309
},
{
"epoch": 3.0,
"learning_rate": 7.256644374963857e-05,
"loss": 0.9361,
"step": 1310
},
{
"epoch": 3.0,
"learning_rate": 7.242356669737344e-05,
"loss": 0.7515,
"step": 1311
},
{
"epoch": 3.01,
"learning_rate": 7.228075054658096e-05,
"loss": 0.5228,
"step": 1312
},
{
"epoch": 3.01,
"learning_rate": 7.213799561266489e-05,
"loss": 0.8614,
"step": 1313
},
{
"epoch": 3.01,
"learning_rate": 7.199530221089398e-05,
"loss": 0.6461,
"step": 1314
},
{
"epoch": 3.01,
"learning_rate": 7.185267065640104e-05,
"loss": 0.6926,
"step": 1315
},
{
"epoch": 3.01,
"learning_rate": 7.171010126418218e-05,
"loss": 0.8601,
"step": 1316
},
{
"epoch": 3.02,
"learning_rate": 7.156759434909639e-05,
"loss": 0.784,
"step": 1317
},
{
"epoch": 3.02,
"learning_rate": 7.142515022586456e-05,
"loss": 1.0793,
"step": 1318
},
{
"epoch": 3.02,
"learning_rate": 7.1282769209069e-05,
"loss": 0.71,
"step": 1319
},
{
"epoch": 3.02,
"learning_rate": 7.114045161315261e-05,
"loss": 0.7129,
"step": 1320
},
{
"epoch": 3.03,
"learning_rate": 7.099819775241819e-05,
"loss": 0.6223,
"step": 1321
},
{
"epoch": 3.03,
"learning_rate": 7.085600794102783e-05,
"loss": 0.643,
"step": 1322
},
{
"epoch": 3.03,
"learning_rate": 7.071388249300218e-05,
"loss": 0.7678,
"step": 1323
},
{
"epoch": 3.03,
"learning_rate": 7.057182172221967e-05,
"loss": 0.6995,
"step": 1324
},
{
"epoch": 3.04,
"learning_rate": 7.042982594241601e-05,
"loss": 0.6812,
"step": 1325
},
{
"epoch": 3.04,
"learning_rate": 7.028789546718326e-05,
"loss": 0.7234,
"step": 1326
},
{
"epoch": 3.04,
"learning_rate": 7.014603060996938e-05,
"loss": 0.8338,
"step": 1327
},
{
"epoch": 3.04,
"learning_rate": 7.00042316840773e-05,
"loss": 0.9738,
"step": 1328
},
{
"epoch": 3.04,
"learning_rate": 6.98624990026644e-05,
"loss": 0.6211,
"step": 1329
},
{
"epoch": 3.05,
"learning_rate": 6.972083287874177e-05,
"loss": 0.7343,
"step": 1330
},
{
"epoch": 3.05,
"learning_rate": 6.957923362517348e-05,
"loss": 0.7291,
"step": 1331
},
{
"epoch": 3.05,
"learning_rate": 6.943770155467593e-05,
"loss": 0.7687,
"step": 1332
},
{
"epoch": 3.05,
"learning_rate": 6.929623697981718e-05,
"loss": 0.7509,
"step": 1333
},
{
"epoch": 3.06,
"learning_rate": 6.915484021301613e-05,
"loss": 0.769,
"step": 1334
},
{
"epoch": 3.06,
"learning_rate": 6.90135115665421e-05,
"loss": 0.7605,
"step": 1335
},
{
"epoch": 3.06,
"learning_rate": 6.887225135251381e-05,
"loss": 0.7519,
"step": 1336
},
{
"epoch": 3.06,
"learning_rate": 6.873105988289892e-05,
"loss": 0.7648,
"step": 1337
},
{
"epoch": 3.07,
"learning_rate": 6.858993746951328e-05,
"loss": 0.8969,
"step": 1338
},
{
"epoch": 3.07,
"learning_rate": 6.844888442402018e-05,
"loss": 0.7229,
"step": 1339
},
{
"epoch": 3.07,
"learning_rate": 6.830790105792973e-05,
"loss": 0.6294,
"step": 1340
},
{
"epoch": 3.07,
"learning_rate": 6.816698768259824e-05,
"loss": 0.7872,
"step": 1341
},
{
"epoch": 3.07,
"learning_rate": 6.802614460922728e-05,
"loss": 0.7555,
"step": 1342
},
{
"epoch": 3.08,
"learning_rate": 6.788537214886335e-05,
"loss": 0.7431,
"step": 1343
},
{
"epoch": 3.08,
"learning_rate": 6.774467061239687e-05,
"loss": 0.7502,
"step": 1344
},
{
"epoch": 3.08,
"learning_rate": 6.760404031056169e-05,
"loss": 0.9202,
"step": 1345
},
{
"epoch": 3.08,
"learning_rate": 6.74634815539343e-05,
"loss": 0.8221,
"step": 1346
},
{
"epoch": 3.09,
"learning_rate": 6.732299465293322e-05,
"loss": 0.8935,
"step": 1347
},
{
"epoch": 3.09,
"learning_rate": 6.718257991781828e-05,
"loss": 0.6869,
"step": 1348
},
{
"epoch": 3.09,
"learning_rate": 6.704223765868991e-05,
"loss": 0.6931,
"step": 1349
},
{
"epoch": 3.09,
"learning_rate": 6.690196818548846e-05,
"loss": 0.7308,
"step": 1350
},
{
"epoch": 3.1,
"learning_rate": 6.67617718079936e-05,
"loss": 0.779,
"step": 1351
},
{
"epoch": 3.1,
"learning_rate": 6.662164883582354e-05,
"loss": 0.7807,
"step": 1352
},
{
"epoch": 3.1,
"learning_rate": 6.648159957843438e-05,
"loss": 0.7942,
"step": 1353
},
{
"epoch": 3.1,
"learning_rate": 6.63416243451194e-05,
"loss": 0.842,
"step": 1354
},
{
"epoch": 3.1,
"learning_rate": 6.62017234450084e-05,
"loss": 0.9713,
"step": 1355
},
{
"epoch": 3.11,
"learning_rate": 6.60618971870671e-05,
"loss": 0.5946,
"step": 1356
},
{
"epoch": 3.11,
"learning_rate": 6.592214588009625e-05,
"loss": 0.656,
"step": 1357
},
{
"epoch": 3.11,
"learning_rate": 6.578246983273118e-05,
"loss": 0.7192,
"step": 1358
},
{
"epoch": 3.11,
"learning_rate": 6.564286935344089e-05,
"loss": 0.7485,
"step": 1359
},
{
"epoch": 3.12,
"learning_rate": 6.550334475052767e-05,
"loss": 0.8379,
"step": 1360
},
{
"epoch": 3.12,
"learning_rate": 6.536389633212609e-05,
"loss": 0.9204,
"step": 1361
},
{
"epoch": 3.12,
"learning_rate": 6.522452440620254e-05,
"loss": 0.7924,
"step": 1362
},
{
"epoch": 3.12,
"learning_rate": 6.508522928055445e-05,
"loss": 0.7988,
"step": 1363
},
{
"epoch": 3.12,
"learning_rate": 6.494601126280963e-05,
"loss": 0.7678,
"step": 1364
},
{
"epoch": 3.13,
"learning_rate": 6.480687066042561e-05,
"loss": 0.7079,
"step": 1365
},
{
"epoch": 3.13,
"learning_rate": 6.466780778068903e-05,
"loss": 0.7104,
"step": 1366
},
{
"epoch": 3.13,
"learning_rate": 6.452882293071468e-05,
"loss": 0.7226,
"step": 1367
},
{
"epoch": 3.13,
"learning_rate": 6.43899164174453e-05,
"loss": 0.8358,
"step": 1368
},
{
"epoch": 3.14,
"learning_rate": 6.42510885476504e-05,
"loss": 0.6752,
"step": 1369
},
{
"epoch": 3.14,
"learning_rate": 6.411233962792593e-05,
"loss": 0.7962,
"step": 1370
},
{
"epoch": 3.14,
"learning_rate": 6.397366996469343e-05,
"loss": 0.8052,
"step": 1371
},
{
"epoch": 3.14,
"learning_rate": 6.383507986419939e-05,
"loss": 0.9013,
"step": 1372
},
{
"epoch": 3.15,
"learning_rate": 6.369656963251467e-05,
"loss": 0.798,
"step": 1373
},
{
"epoch": 3.15,
"learning_rate": 6.355813957553364e-05,
"loss": 0.7121,
"step": 1374
},
{
"epoch": 3.15,
"learning_rate": 6.341978999897365e-05,
"loss": 0.7275,
"step": 1375
},
{
"epoch": 3.15,
"learning_rate": 6.328152120837439e-05,
"loss": 0.7393,
"step": 1376
},
{
"epoch": 3.15,
"learning_rate": 6.314333350909701e-05,
"loss": 0.9145,
"step": 1377
},
{
"epoch": 3.16,
"learning_rate": 6.300522720632367e-05,
"loss": 0.8225,
"step": 1378
},
{
"epoch": 3.16,
"learning_rate": 6.286720260505668e-05,
"loss": 0.842,
"step": 1379
},
{
"epoch": 3.16,
"learning_rate": 6.2729260010118e-05,
"loss": 0.9227,
"step": 1380
},
{
"epoch": 3.16,
"learning_rate": 6.259139972614845e-05,
"loss": 0.8438,
"step": 1381
},
{
"epoch": 3.17,
"learning_rate": 6.245362205760704e-05,
"loss": 0.9213,
"step": 1382
},
{
"epoch": 3.17,
"learning_rate": 6.231592730877035e-05,
"loss": 0.7469,
"step": 1383
},
{
"epoch": 3.17,
"learning_rate": 6.217831578373185e-05,
"loss": 0.7289,
"step": 1384
},
{
"epoch": 3.17,
"learning_rate": 6.204078778640121e-05,
"loss": 0.8306,
"step": 1385
},
{
"epoch": 3.18,
"learning_rate": 6.190334362050365e-05,
"loss": 0.7807,
"step": 1386
},
{
"epoch": 3.18,
"learning_rate": 6.176598358957919e-05,
"loss": 0.7564,
"step": 1387
},
{
"epoch": 3.18,
"learning_rate": 6.162870799698209e-05,
"loss": 0.8306,
"step": 1388
},
{
"epoch": 3.18,
"learning_rate": 6.149151714588009e-05,
"loss": 0.7317,
"step": 1389
},
{
"epoch": 3.18,
"learning_rate": 6.135441133925382e-05,
"loss": 0.8923,
"step": 1390
},
{
"epoch": 3.19,
"learning_rate": 6.121739087989613e-05,
"loss": 0.7723,
"step": 1391
},
{
"epoch": 3.19,
"learning_rate": 6.108045607041125e-05,
"loss": 0.796,
"step": 1392
},
{
"epoch": 3.19,
"learning_rate": 6.0943607213214425e-05,
"loss": 0.7907,
"step": 1393
},
{
"epoch": 3.19,
"learning_rate": 6.0806844610530956e-05,
"loss": 0.7709,
"step": 1394
},
{
"epoch": 3.2,
"learning_rate": 6.0670168564395705e-05,
"loss": 0.8841,
"step": 1395
},
{
"epoch": 3.2,
"learning_rate": 6.053357937665237e-05,
"loss": 0.6325,
"step": 1396
},
{
"epoch": 3.2,
"learning_rate": 6.039707734895279e-05,
"loss": 0.8047,
"step": 1397
},
{
"epoch": 3.2,
"learning_rate": 6.0260662782756374e-05,
"loss": 0.7933,
"step": 1398
},
{
"epoch": 3.21,
"learning_rate": 6.012433597932936e-05,
"loss": 0.8016,
"step": 1399
},
{
"epoch": 3.21,
"learning_rate": 5.998809723974407e-05,
"loss": 0.8992,
"step": 1400
},
{
"epoch": 3.21,
"learning_rate": 5.985194686487854e-05,
"loss": 0.7384,
"step": 1401
},
{
"epoch": 3.21,
"learning_rate": 5.971588515541546e-05,
"loss": 0.7214,
"step": 1402
},
{
"epoch": 3.21,
"learning_rate": 5.957991241184184e-05,
"loss": 0.7394,
"step": 1403
},
{
"epoch": 3.22,
"learning_rate": 5.94440289344481e-05,
"loss": 0.6268,
"step": 1404
},
{
"epoch": 3.22,
"learning_rate": 5.9308235023327604e-05,
"loss": 0.8049,
"step": 1405
},
{
"epoch": 3.22,
"learning_rate": 5.9172530978375894e-05,
"loss": 0.8396,
"step": 1406
},
{
"epoch": 3.22,
"learning_rate": 5.9036917099290026e-05,
"loss": 0.7694,
"step": 1407
},
{
"epoch": 3.23,
"learning_rate": 5.890139368556791e-05,
"loss": 0.7289,
"step": 1408
},
{
"epoch": 3.23,
"learning_rate": 5.8765961036507736e-05,
"loss": 0.7949,
"step": 1409
},
{
"epoch": 3.23,
"learning_rate": 5.863061945120719e-05,
"loss": 0.9371,
"step": 1410
},
{
"epoch": 3.23,
"learning_rate": 5.8495369228562894e-05,
"loss": 0.7323,
"step": 1411
},
{
"epoch": 3.23,
"learning_rate": 5.836021066726962e-05,
"loss": 0.8331,
"step": 1412
},
{
"epoch": 3.24,
"learning_rate": 5.8225144065819745e-05,
"loss": 0.768,
"step": 1413
},
{
"epoch": 3.24,
"learning_rate": 5.809016972250263e-05,
"loss": 0.7804,
"step": 1414
},
{
"epoch": 3.24,
"learning_rate": 5.795528793540379e-05,
"loss": 0.771,
"step": 1415
},
{
"epoch": 3.24,
"learning_rate": 5.782049900240432e-05,
"loss": 0.7431,
"step": 1416
},
{
"epoch": 3.25,
"learning_rate": 5.768580322118034e-05,
"loss": 0.8618,
"step": 1417
},
{
"epoch": 3.25,
"learning_rate": 5.755120088920225e-05,
"loss": 0.7639,
"step": 1418
},
{
"epoch": 3.25,
"learning_rate": 5.7416692303733946e-05,
"loss": 0.8375,
"step": 1419
},
{
"epoch": 3.25,
"learning_rate": 5.728227776183244e-05,
"loss": 0.7409,
"step": 1420
},
{
"epoch": 3.26,
"learning_rate": 5.714795756034695e-05,
"loss": 0.7529,
"step": 1421
},
{
"epoch": 3.26,
"learning_rate": 5.701373199591835e-05,
"loss": 0.8878,
"step": 1422
},
{
"epoch": 3.26,
"learning_rate": 5.687960136497861e-05,
"loss": 0.6923,
"step": 1423
},
{
"epoch": 3.26,
"learning_rate": 5.6745565963749925e-05,
"loss": 0.8628,
"step": 1424
},
{
"epoch": 3.26,
"learning_rate": 5.6611626088244194e-05,
"loss": 0.6949,
"step": 1425
},
{
"epoch": 3.27,
"learning_rate": 5.6477782034262436e-05,
"loss": 0.7278,
"step": 1426
},
{
"epoch": 3.27,
"learning_rate": 5.634403409739402e-05,
"loss": 0.8781,
"step": 1427
},
{
"epoch": 3.27,
"learning_rate": 5.621038257301601e-05,
"loss": 0.7329,
"step": 1428
},
{
"epoch": 3.27,
"learning_rate": 5.6076827756292495e-05,
"loss": 0.7195,
"step": 1429
},
{
"epoch": 3.28,
"learning_rate": 5.594336994217415e-05,
"loss": 0.7283,
"step": 1430
},
{
"epoch": 3.28,
"learning_rate": 5.5810009425397294e-05,
"loss": 0.8064,
"step": 1431
},
{
"epoch": 3.28,
"learning_rate": 5.5676746500483336e-05,
"loss": 0.8488,
"step": 1432
},
{
"epoch": 3.28,
"learning_rate": 5.55435814617383e-05,
"loss": 0.8925,
"step": 1433
},
{
"epoch": 3.29,
"learning_rate": 5.5410514603251985e-05,
"loss": 0.7677,
"step": 1434
},
{
"epoch": 3.29,
"learning_rate": 5.5277546218897294e-05,
"loss": 0.8037,
"step": 1435
},
{
"epoch": 3.29,
"learning_rate": 5.514467660232965e-05,
"loss": 0.8046,
"step": 1436
},
{
"epoch": 3.29,
"learning_rate": 5.5011906046986473e-05,
"loss": 0.7885,
"step": 1437
},
{
"epoch": 3.29,
"learning_rate": 5.487923484608629e-05,
"loss": 0.8264,
"step": 1438
},
{
"epoch": 3.3,
"learning_rate": 5.4746663292628234e-05,
"loss": 0.7551,
"step": 1439
},
{
"epoch": 3.3,
"learning_rate": 5.4614191679391444e-05,
"loss": 0.8766,
"step": 1440
},
{
"epoch": 3.3,
"learning_rate": 5.448182029893423e-05,
"loss": 0.8992,
"step": 1441
},
{
"epoch": 3.3,
"learning_rate": 5.434954944359365e-05,
"loss": 0.6505,
"step": 1442
},
{
"epoch": 3.31,
"learning_rate": 5.4217379405484636e-05,
"loss": 0.8743,
"step": 1443
},
{
"epoch": 3.31,
"learning_rate": 5.408531047649964e-05,
"loss": 0.6965,
"step": 1444
},
{
"epoch": 3.31,
"learning_rate": 5.395334294830765e-05,
"loss": 0.7663,
"step": 1445
},
{
"epoch": 3.31,
"learning_rate": 5.382147711235377e-05,
"loss": 0.86,
"step": 1446
},
{
"epoch": 3.32,
"learning_rate": 5.3689713259858586e-05,
"loss": 0.7524,
"step": 1447
},
{
"epoch": 3.32,
"learning_rate": 5.355805168181738e-05,
"loss": 0.9115,
"step": 1448
},
{
"epoch": 3.32,
"learning_rate": 5.342649266899955e-05,
"loss": 0.8342,
"step": 1449
},
{
"epoch": 3.32,
"learning_rate": 5.329503651194805e-05,
"loss": 0.8447,
"step": 1450
},
{
"epoch": 3.32,
"learning_rate": 5.316368350097869e-05,
"loss": 0.7877,
"step": 1451
},
{
"epoch": 3.33,
"learning_rate": 5.3032433926179395e-05,
"loss": 0.7965,
"step": 1452
},
{
"epoch": 3.33,
"learning_rate": 5.290128807740976e-05,
"loss": 0.7844,
"step": 1453
},
{
"epoch": 3.33,
"learning_rate": 5.2770246244300224e-05,
"loss": 0.7405,
"step": 1454
},
{
"epoch": 3.33,
"learning_rate": 5.263930871625151e-05,
"loss": 0.7782,
"step": 1455
},
{
"epoch": 3.34,
"learning_rate": 5.2508475782434093e-05,
"loss": 0.7789,
"step": 1456
},
{
"epoch": 3.34,
"learning_rate": 5.237774773178734e-05,
"loss": 0.8943,
"step": 1457
},
{
"epoch": 3.34,
"learning_rate": 5.224712485301898e-05,
"loss": 0.7712,
"step": 1458
},
{
"epoch": 3.34,
"learning_rate": 5.211660743460458e-05,
"loss": 0.8608,
"step": 1459
},
{
"epoch": 3.34,
"learning_rate": 5.198619576478678e-05,
"loss": 0.7212,
"step": 1460
},
{
"epoch": 3.35,
"learning_rate": 5.1855890131574614e-05,
"loss": 0.7588,
"step": 1461
},
{
"epoch": 3.35,
"learning_rate": 5.17256908227429e-05,
"loss": 0.8001,
"step": 1462
},
{
"epoch": 3.35,
"learning_rate": 5.159559812583181e-05,
"loss": 0.8327,
"step": 1463
},
{
"epoch": 3.35,
"learning_rate": 5.146561232814593e-05,
"loss": 0.8874,
"step": 1464
},
{
"epoch": 3.36,
"learning_rate": 5.133573371675375e-05,
"loss": 0.6802,
"step": 1465
},
{
"epoch": 3.36,
"learning_rate": 5.1205962578487155e-05,
"loss": 0.7581,
"step": 1466
},
{
"epoch": 3.36,
"learning_rate": 5.1076299199940645e-05,
"loss": 0.8714,
"step": 1467
},
{
"epoch": 3.36,
"learning_rate": 5.094674386747067e-05,
"loss": 0.6667,
"step": 1468
},
{
"epoch": 3.37,
"learning_rate": 5.081729686719508e-05,
"loss": 0.8107,
"step": 1469
},
{
"epoch": 3.37,
"learning_rate": 5.068795848499257e-05,
"loss": 0.8891,
"step": 1470
},
{
"epoch": 3.37,
"learning_rate": 5.0558729006501846e-05,
"loss": 0.7259,
"step": 1471
},
{
"epoch": 3.37,
"learning_rate": 5.042960871712112e-05,
"loss": 0.8035,
"step": 1472
},
{
"epoch": 3.37,
"learning_rate": 5.030059790200756e-05,
"loss": 0.7042,
"step": 1473
},
{
"epoch": 3.38,
"learning_rate": 5.0171696846076446e-05,
"loss": 0.7852,
"step": 1474
},
{
"epoch": 3.38,
"learning_rate": 5.004290583400075e-05,
"loss": 0.8489,
"step": 1475
},
{
"epoch": 3.38,
"learning_rate": 4.9914225150210335e-05,
"loss": 0.7696,
"step": 1476
},
{
"epoch": 3.38,
"learning_rate": 4.97856550788915e-05,
"loss": 0.7,
"step": 1477
},
{
"epoch": 3.39,
"learning_rate": 4.9657195903986185e-05,
"loss": 0.8373,
"step": 1478
},
{
"epoch": 3.39,
"learning_rate": 4.952884790919141e-05,
"loss": 0.8822,
"step": 1479
},
{
"epoch": 3.39,
"learning_rate": 4.940061137795876e-05,
"loss": 0.7292,
"step": 1480
},
{
"epoch": 3.39,
"learning_rate": 4.927248659349355e-05,
"loss": 0.8165,
"step": 1481
},
{
"epoch": 3.4,
"learning_rate": 4.914447383875432e-05,
"loss": 0.7782,
"step": 1482
},
{
"epoch": 3.4,
"learning_rate": 4.901657339645226e-05,
"loss": 0.8172,
"step": 1483
},
{
"epoch": 3.4,
"learning_rate": 4.888878554905051e-05,
"loss": 0.8072,
"step": 1484
},
{
"epoch": 3.4,
"learning_rate": 4.876111057876347e-05,
"loss": 0.7715,
"step": 1485
},
{
"epoch": 3.4,
"learning_rate": 4.863354876755637e-05,
"loss": 0.7384,
"step": 1486
},
{
"epoch": 3.41,
"learning_rate": 4.850610039714444e-05,
"loss": 0.7881,
"step": 1487
},
{
"epoch": 3.41,
"learning_rate": 4.837876574899237e-05,
"loss": 0.7962,
"step": 1488
},
{
"epoch": 3.41,
"learning_rate": 4.8251545104313836e-05,
"loss": 0.5635,
"step": 1489
},
{
"epoch": 3.41,
"learning_rate": 4.812443874407059e-05,
"loss": 0.7454,
"step": 1490
},
{
"epoch": 3.42,
"learning_rate": 4.7997446948972015e-05,
"loss": 0.8505,
"step": 1491
},
{
"epoch": 3.42,
"learning_rate": 4.787056999947455e-05,
"loss": 0.6157,
"step": 1492
},
{
"epoch": 3.42,
"learning_rate": 4.774380817578101e-05,
"loss": 0.7731,
"step": 1493
},
{
"epoch": 3.42,
"learning_rate": 4.761716175783989e-05,
"loss": 0.8062,
"step": 1494
},
{
"epoch": 3.42,
"learning_rate": 4.74906310253448e-05,
"loss": 0.7027,
"step": 1495
},
{
"epoch": 3.43,
"learning_rate": 4.736421625773396e-05,
"loss": 0.7,
"step": 1496
},
{
"epoch": 3.43,
"learning_rate": 4.723791773418942e-05,
"loss": 0.7822,
"step": 1497
},
{
"epoch": 3.43,
"learning_rate": 4.7111735733636466e-05,
"loss": 0.6308,
"step": 1498
},
{
"epoch": 3.43,
"learning_rate": 4.698567053474315e-05,
"loss": 0.6722,
"step": 1499
},
{
"epoch": 3.44,
"learning_rate": 4.685972241591956e-05,
"loss": 0.749,
"step": 1500
},
{
"epoch": 3.44,
"learning_rate": 4.673389165531714e-05,
"loss": 0.7784,
"step": 1501
},
{
"epoch": 3.44,
"learning_rate": 4.6608178530828174e-05,
"loss": 0.7971,
"step": 1502
},
{
"epoch": 3.44,
"learning_rate": 4.648258332008523e-05,
"loss": 0.8398,
"step": 1503
},
{
"epoch": 3.45,
"learning_rate": 4.6357106300460374e-05,
"loss": 0.6559,
"step": 1504
},
{
"epoch": 3.45,
"learning_rate": 4.6231747749064644e-05,
"loss": 0.7837,
"step": 1505
},
{
"epoch": 3.45,
"learning_rate": 4.610650794274759e-05,
"loss": 0.8072,
"step": 1506
},
{
"epoch": 3.45,
"learning_rate": 4.598138715809633e-05,
"loss": 0.7441,
"step": 1507
},
{
"epoch": 3.45,
"learning_rate": 4.585638567143529e-05,
"loss": 0.8233,
"step": 1508
},
{
"epoch": 3.46,
"learning_rate": 4.573150375882527e-05,
"loss": 0.8868,
"step": 1509
},
{
"epoch": 3.46,
"learning_rate": 4.560674169606317e-05,
"loss": 0.7059,
"step": 1510
},
{
"epoch": 3.46,
"learning_rate": 4.548209975868108e-05,
"loss": 0.8349,
"step": 1511
},
{
"epoch": 3.46,
"learning_rate": 4.5357578221945794e-05,
"loss": 0.817,
"step": 1512
},
{
"epoch": 3.47,
"learning_rate": 4.523317736085831e-05,
"loss": 0.7375,
"step": 1513
},
{
"epoch": 3.47,
"learning_rate": 4.5108897450153054e-05,
"loss": 0.8338,
"step": 1514
},
{
"epoch": 3.47,
"learning_rate": 4.498473876429726e-05,
"loss": 0.9212,
"step": 1515
},
{
"epoch": 3.47,
"learning_rate": 4.4860701577490595e-05,
"loss": 0.7182,
"step": 1516
},
{
"epoch": 3.48,
"learning_rate": 4.473678616366433e-05,
"loss": 0.8677,
"step": 1517
},
{
"epoch": 3.48,
"learning_rate": 4.461299279648077e-05,
"loss": 0.7868,
"step": 1518
},
{
"epoch": 3.48,
"learning_rate": 4.4489321749332744e-05,
"loss": 0.7078,
"step": 1519
},
{
"epoch": 3.48,
"learning_rate": 4.436577329534291e-05,
"loss": 0.6872,
"step": 1520
},
{
"epoch": 3.48,
"learning_rate": 4.424234770736314e-05,
"loss": 0.7523,
"step": 1521
},
{
"epoch": 3.49,
"learning_rate": 4.411904525797408e-05,
"loss": 0.7107,
"step": 1522
},
{
"epoch": 3.49,
"learning_rate": 4.3995866219484326e-05,
"loss": 0.8932,
"step": 1523
},
{
"epoch": 3.49,
"learning_rate": 4.387281086392994e-05,
"loss": 0.7811,
"step": 1524
},
{
"epoch": 3.49,
"learning_rate": 4.374987946307385e-05,
"loss": 0.8946,
"step": 1525
},
{
"epoch": 3.5,
"learning_rate": 4.362707228840531e-05,
"loss": 0.8496,
"step": 1526
},
{
"epoch": 3.5,
"learning_rate": 4.350438961113911e-05,
"loss": 0.6998,
"step": 1527
},
{
"epoch": 3.5,
"learning_rate": 4.3381831702215084e-05,
"loss": 0.6792,
"step": 1528
},
{
"epoch": 3.5,
"learning_rate": 4.325939883229766e-05,
"loss": 0.7644,
"step": 1529
},
{
"epoch": 3.51,
"learning_rate": 4.3137091271775e-05,
"loss": 0.6055,
"step": 1530
},
{
"epoch": 3.51,
"learning_rate": 4.301490929075852e-05,
"loss": 0.7126,
"step": 1531
},
{
"epoch": 3.51,
"learning_rate": 4.289285315908237e-05,
"loss": 0.7635,
"step": 1532
},
{
"epoch": 3.51,
"learning_rate": 4.277092314630278e-05,
"loss": 0.9089,
"step": 1533
},
{
"epoch": 3.51,
"learning_rate": 4.264911952169735e-05,
"loss": 0.7267,
"step": 1534
},
{
"epoch": 3.52,
"learning_rate": 4.2527442554264605e-05,
"loss": 0.6774,
"step": 1535
},
{
"epoch": 3.52,
"learning_rate": 4.240589251272342e-05,
"loss": 0.8402,
"step": 1536
},
{
"epoch": 3.52,
"learning_rate": 4.228446966551226e-05,
"loss": 0.8603,
"step": 1537
},
{
"epoch": 3.52,
"learning_rate": 4.2163174280788697e-05,
"loss": 0.6459,
"step": 1538
},
{
"epoch": 3.53,
"learning_rate": 4.2042006626428906e-05,
"loss": 0.7192,
"step": 1539
},
{
"epoch": 3.53,
"learning_rate": 4.192096697002686e-05,
"loss": 0.8621,
"step": 1540
},
{
"epoch": 3.53,
"learning_rate": 4.1800055578893883e-05,
"loss": 0.8194,
"step": 1541
},
{
"epoch": 3.53,
"learning_rate": 4.167927272005805e-05,
"loss": 0.8702,
"step": 1542
},
{
"epoch": 3.53,
"learning_rate": 4.155861866026364e-05,
"loss": 0.8677,
"step": 1543
},
{
"epoch": 3.54,
"learning_rate": 4.143809366597037e-05,
"loss": 0.7971,
"step": 1544
},
{
"epoch": 3.54,
"learning_rate": 4.131769800335292e-05,
"loss": 0.7896,
"step": 1545
},
{
"epoch": 3.54,
"learning_rate": 4.119743193830048e-05,
"loss": 0.889,
"step": 1546
},
{
"epoch": 3.54,
"learning_rate": 4.10772957364159e-05,
"loss": 0.7497,
"step": 1547
},
{
"epoch": 3.55,
"learning_rate": 4.0957289663015255e-05,
"loss": 0.9096,
"step": 1548
},
{
"epoch": 3.55,
"learning_rate": 4.083741398312727e-05,
"loss": 0.8658,
"step": 1549
},
{
"epoch": 3.55,
"learning_rate": 4.071766896149273e-05,
"loss": 0.5634,
"step": 1550
},
{
"epoch": 3.55,
"learning_rate": 4.059805486256376e-05,
"loss": 0.6693,
"step": 1551
},
{
"epoch": 3.56,
"learning_rate": 4.0478571950503486e-05,
"loss": 0.7128,
"step": 1552
},
{
"epoch": 3.56,
"learning_rate": 4.035922048918519e-05,
"loss": 0.7838,
"step": 1553
},
{
"epoch": 3.56,
"learning_rate": 4.024000074219187e-05,
"loss": 0.9549,
"step": 1554
},
{
"epoch": 3.56,
"learning_rate": 4.012091297281574e-05,
"loss": 0.6245,
"step": 1555
},
{
"epoch": 3.56,
"learning_rate": 4.0001957444057426e-05,
"loss": 0.7671,
"step": 1556
},
{
"epoch": 3.57,
"learning_rate": 3.988313441862553e-05,
"loss": 0.6645,
"step": 1557
},
{
"epoch": 3.57,
"learning_rate": 3.976444415893608e-05,
"loss": 0.8291,
"step": 1558
},
{
"epoch": 3.57,
"learning_rate": 3.96458869271119e-05,
"loss": 0.8715,
"step": 1559
},
{
"epoch": 3.57,
"learning_rate": 3.952746298498195e-05,
"loss": 0.8423,
"step": 1560
},
{
"epoch": 3.58,
"learning_rate": 3.940917259408085e-05,
"loss": 0.8303,
"step": 1561
},
{
"epoch": 3.58,
"learning_rate": 3.929101601564834e-05,
"loss": 0.7876,
"step": 1562
},
{
"epoch": 3.58,
"learning_rate": 3.9172993510628574e-05,
"loss": 0.7409,
"step": 1563
},
{
"epoch": 3.58,
"learning_rate": 3.9055105339669595e-05,
"loss": 0.8988,
"step": 1564
},
{
"epoch": 3.59,
"learning_rate": 3.8937351763122845e-05,
"loss": 1.0367,
"step": 1565
},
{
"epoch": 3.59,
"learning_rate": 3.8819733041042515e-05,
"loss": 0.682,
"step": 1566
},
{
"epoch": 3.59,
"learning_rate": 3.870224943318491e-05,
"loss": 0.815,
"step": 1567
},
{
"epoch": 3.59,
"learning_rate": 3.858490119900794e-05,
"loss": 0.6516,
"step": 1568
},
{
"epoch": 3.59,
"learning_rate": 3.846768859767066e-05,
"loss": 0.7371,
"step": 1569
},
{
"epoch": 3.6,
"learning_rate": 3.8350611888032474e-05,
"loss": 0.7401,
"step": 1570
},
{
"epoch": 3.6,
"learning_rate": 3.823367132865265e-05,
"loss": 0.7305,
"step": 1571
},
{
"epoch": 3.6,
"learning_rate": 3.8116867177789936e-05,
"loss": 0.7422,
"step": 1572
},
{
"epoch": 3.6,
"learning_rate": 3.8000199693401675e-05,
"loss": 0.7621,
"step": 1573
},
{
"epoch": 3.61,
"learning_rate": 3.788366913314339e-05,
"loss": 0.935,
"step": 1574
},
{
"epoch": 3.61,
"learning_rate": 3.776727575436829e-05,
"loss": 0.7587,
"step": 1575
},
{
"epoch": 3.61,
"learning_rate": 3.7651019814126654e-05,
"loss": 0.9029,
"step": 1576
},
{
"epoch": 3.61,
"learning_rate": 3.753490156916511e-05,
"loss": 0.8324,
"step": 1577
},
{
"epoch": 3.62,
"learning_rate": 3.741892127592625e-05,
"loss": 0.7316,
"step": 1578
},
{
"epoch": 3.62,
"learning_rate": 3.730307919054803e-05,
"loss": 0.684,
"step": 1579
},
{
"epoch": 3.62,
"learning_rate": 3.718737556886316e-05,
"loss": 0.7547,
"step": 1580
},
{
"epoch": 3.62,
"learning_rate": 3.7071810666398496e-05,
"loss": 0.8581,
"step": 1581
},
{
"epoch": 3.62,
"learning_rate": 3.695638473837466e-05,
"loss": 0.7707,
"step": 1582
},
{
"epoch": 3.63,
"learning_rate": 3.684109803970531e-05,
"loss": 0.755,
"step": 1583
},
{
"epoch": 3.63,
"learning_rate": 3.6725950824996535e-05,
"loss": 0.8436,
"step": 1584
},
{
"epoch": 3.63,
"learning_rate": 3.6610943348546526e-05,
"loss": 0.7491,
"step": 1585
},
{
"epoch": 3.63,
"learning_rate": 3.649607586434474e-05,
"loss": 0.6946,
"step": 1586
},
{
"epoch": 3.64,
"learning_rate": 3.6381348626071475e-05,
"loss": 0.7697,
"step": 1587
},
{
"epoch": 3.64,
"learning_rate": 3.626676188709743e-05,
"loss": 0.8108,
"step": 1588
},
{
"epoch": 3.64,
"learning_rate": 3.6152315900482905e-05,
"loss": 0.7676,
"step": 1589
},
{
"epoch": 3.64,
"learning_rate": 3.603801091897731e-05,
"loss": 0.8506,
"step": 1590
},
{
"epoch": 3.64,
"learning_rate": 3.592384719501878e-05,
"loss": 0.7521,
"step": 1591
},
{
"epoch": 3.65,
"learning_rate": 3.580982498073344e-05,
"loss": 0.8371,
"step": 1592
},
{
"epoch": 3.65,
"learning_rate": 3.5695944527934865e-05,
"loss": 0.816,
"step": 1593
},
{
"epoch": 3.65,
"learning_rate": 3.5582206088123535e-05,
"loss": 0.7097,
"step": 1594
},
{
"epoch": 3.65,
"learning_rate": 3.546860991248641e-05,
"loss": 0.7147,
"step": 1595
},
{
"epoch": 3.66,
"learning_rate": 3.5355156251896136e-05,
"loss": 0.7807,
"step": 1596
},
{
"epoch": 3.66,
"learning_rate": 3.524184535691068e-05,
"loss": 0.8517,
"step": 1597
},
{
"epoch": 3.66,
"learning_rate": 3.5128677477772734e-05,
"loss": 0.8549,
"step": 1598
},
{
"epoch": 3.66,
"learning_rate": 3.501565286440914e-05,
"loss": 0.7514,
"step": 1599
},
{
"epoch": 3.67,
"learning_rate": 3.490277176643033e-05,
"loss": 0.8055,
"step": 1600
},
{
"epoch": 3.67,
"learning_rate": 3.4790034433129725e-05,
"loss": 0.5494,
"step": 1601
},
{
"epoch": 3.67,
"learning_rate": 3.467744111348338e-05,
"loss": 0.9018,
"step": 1602
},
{
"epoch": 3.67,
"learning_rate": 3.4564992056149214e-05,
"loss": 0.7319,
"step": 1603
},
{
"epoch": 3.67,
"learning_rate": 3.445268750946651e-05,
"loss": 0.8997,
"step": 1604
},
{
"epoch": 3.68,
"learning_rate": 3.434052772145554e-05,
"loss": 0.7977,
"step": 1605
},
{
"epoch": 3.68,
"learning_rate": 3.422851293981676e-05,
"loss": 0.7205,
"step": 1606
},
{
"epoch": 3.68,
"learning_rate": 3.411664341193041e-05,
"loss": 0.848,
"step": 1607
},
{
"epoch": 3.68,
"learning_rate": 3.400491938485596e-05,
"loss": 0.7864,
"step": 1608
},
{
"epoch": 3.69,
"learning_rate": 3.389334110533161e-05,
"loss": 0.7184,
"step": 1609
},
{
"epoch": 3.69,
"learning_rate": 3.378190881977359e-05,
"loss": 0.8362,
"step": 1610
},
{
"epoch": 3.69,
"learning_rate": 3.367062277427567e-05,
"loss": 0.6743,
"step": 1611
},
{
"epoch": 3.69,
"learning_rate": 3.3559483214608824e-05,
"loss": 0.7561,
"step": 1612
},
{
"epoch": 3.7,
"learning_rate": 3.3448490386220355e-05,
"loss": 0.7342,
"step": 1613
},
{
"epoch": 3.7,
"learning_rate": 3.333764453423357e-05,
"loss": 0.7918,
"step": 1614
},
{
"epoch": 3.7,
"learning_rate": 3.322694590344719e-05,
"loss": 0.75,
"step": 1615
},
{
"epoch": 3.7,
"learning_rate": 3.3116394738334866e-05,
"loss": 0.7874,
"step": 1616
},
{
"epoch": 3.7,
"learning_rate": 3.300599128304443e-05,
"loss": 0.7555,
"step": 1617
},
{
"epoch": 3.71,
"learning_rate": 3.2895735781397685e-05,
"loss": 0.8434,
"step": 1618
},
{
"epoch": 3.71,
"learning_rate": 3.278562847688951e-05,
"loss": 0.8756,
"step": 1619
},
{
"epoch": 3.71,
"learning_rate": 3.2675669612687565e-05,
"loss": 0.8765,
"step": 1620
},
{
"epoch": 3.71,
"learning_rate": 3.256585943163176e-05,
"loss": 0.8501,
"step": 1621
},
{
"epoch": 3.72,
"learning_rate": 3.2456198176233543e-05,
"loss": 1.0232,
"step": 1622
},
{
"epoch": 3.72,
"learning_rate": 3.234668608867547e-05,
"loss": 0.7117,
"step": 1623
},
{
"epoch": 3.72,
"learning_rate": 3.2237323410810715e-05,
"loss": 0.9795,
"step": 1624
},
{
"epoch": 3.72,
"learning_rate": 3.212811038416251e-05,
"loss": 0.887,
"step": 1625
},
{
"epoch": 3.73,
"learning_rate": 3.201904724992352e-05,
"loss": 0.7008,
"step": 1626
},
{
"epoch": 3.73,
"learning_rate": 3.191013424895536e-05,
"loss": 0.7542,
"step": 1627
},
{
"epoch": 3.73,
"learning_rate": 3.18013716217882e-05,
"loss": 0.871,
"step": 1628
},
{
"epoch": 3.73,
"learning_rate": 3.1692759608620004e-05,
"loss": 0.7761,
"step": 1629
},
{
"epoch": 3.73,
"learning_rate": 3.158429844931611e-05,
"loss": 0.842,
"step": 1630
},
{
"epoch": 3.74,
"learning_rate": 3.1475988383408774e-05,
"loss": 0.8322,
"step": 1631
},
{
"epoch": 3.74,
"learning_rate": 3.136782965009658e-05,
"loss": 0.7911,
"step": 1632
},
{
"epoch": 3.74,
"learning_rate": 3.1259822488243806e-05,
"loss": 0.8911,
"step": 1633
},
{
"epoch": 3.74,
"learning_rate": 3.115196713638e-05,
"loss": 0.9232,
"step": 1634
},
{
"epoch": 3.75,
"learning_rate": 3.104426383269957e-05,
"loss": 0.8265,
"step": 1635
},
{
"epoch": 3.75,
"learning_rate": 3.093671281506099e-05,
"loss": 0.7861,
"step": 1636
},
{
"epoch": 3.75,
"learning_rate": 3.0829314320986433e-05,
"loss": 0.6548,
"step": 1637
},
{
"epoch": 3.75,
"learning_rate": 3.072206858766134e-05,
"loss": 0.7974,
"step": 1638
},
{
"epoch": 3.75,
"learning_rate": 3.061497585193369e-05,
"loss": 0.849,
"step": 1639
},
{
"epoch": 3.76,
"learning_rate": 3.050803635031355e-05,
"loss": 0.7438,
"step": 1640
},
{
"epoch": 3.76,
"learning_rate": 3.040125031897264e-05,
"loss": 0.838,
"step": 1641
},
{
"epoch": 3.76,
"learning_rate": 3.029461799374378e-05,
"loss": 0.8879,
"step": 1642
},
{
"epoch": 3.76,
"learning_rate": 3.0188139610120248e-05,
"loss": 0.7747,
"step": 1643
},
{
"epoch": 3.77,
"learning_rate": 3.0081815403255332e-05,
"loss": 0.7179,
"step": 1644
},
{
"epoch": 3.77,
"learning_rate": 2.9975645607961955e-05,
"loss": 0.7618,
"step": 1645
},
{
"epoch": 3.77,
"learning_rate": 2.9869630458711927e-05,
"loss": 0.6977,
"step": 1646
},
{
"epoch": 3.77,
"learning_rate": 2.9763770189635497e-05,
"loss": 0.8052,
"step": 1647
},
{
"epoch": 3.78,
"learning_rate": 2.9658065034520978e-05,
"loss": 0.728,
"step": 1648
},
{
"epoch": 3.78,
"learning_rate": 2.955251522681408e-05,
"loss": 0.8593,
"step": 1649
},
{
"epoch": 3.78,
"learning_rate": 2.944712099961736e-05,
"loss": 0.8347,
"step": 1650
},
{
"epoch": 3.78,
"learning_rate": 2.9341882585689905e-05,
"loss": 0.733,
"step": 1651
},
{
"epoch": 3.78,
"learning_rate": 2.9236800217446593e-05,
"loss": 0.6998,
"step": 1652
},
{
"epoch": 3.79,
"learning_rate": 2.9131874126957727e-05,
"loss": 0.8923,
"step": 1653
},
{
"epoch": 3.79,
"learning_rate": 2.9027104545948414e-05,
"loss": 0.5376,
"step": 1654
},
{
"epoch": 3.79,
"learning_rate": 2.892249170579826e-05,
"loss": 0.7465,
"step": 1655
},
{
"epoch": 3.79,
"learning_rate": 2.8818035837540537e-05,
"loss": 0.7833,
"step": 1656
},
{
"epoch": 3.8,
"learning_rate": 2.8713737171861986e-05,
"loss": 0.7611,
"step": 1657
},
{
"epoch": 3.8,
"learning_rate": 2.8609595939102153e-05,
"loss": 0.7226,
"step": 1658
},
{
"epoch": 3.8,
"learning_rate": 2.8505612369252832e-05,
"loss": 0.8847,
"step": 1659
},
{
"epoch": 3.8,
"learning_rate": 2.840178669195763e-05,
"loss": 0.7511,
"step": 1660
},
{
"epoch": 3.81,
"learning_rate": 2.8298119136511558e-05,
"loss": 0.6833,
"step": 1661
},
{
"epoch": 3.81,
"learning_rate": 2.8194609931860316e-05,
"loss": 0.7595,
"step": 1662
},
{
"epoch": 3.81,
"learning_rate": 2.8091259306599904e-05,
"loss": 0.7486,
"step": 1663
},
{
"epoch": 3.81,
"learning_rate": 2.7988067488976156e-05,
"loss": 0.8106,
"step": 1664
},
{
"epoch": 3.81,
"learning_rate": 2.7885034706884185e-05,
"loss": 0.8012,
"step": 1665
},
{
"epoch": 3.82,
"learning_rate": 2.7782161187867818e-05,
"loss": 0.7598,
"step": 1666
},
{
"epoch": 3.82,
"learning_rate": 2.7679447159119164e-05,
"loss": 0.6638,
"step": 1667
},
{
"epoch": 3.82,
"learning_rate": 2.7576892847478207e-05,
"loss": 0.6576,
"step": 1668
},
{
"epoch": 3.82,
"learning_rate": 2.7474498479432087e-05,
"loss": 0.8174,
"step": 1669
},
{
"epoch": 3.83,
"learning_rate": 2.737226428111471e-05,
"loss": 0.868,
"step": 1670
},
{
"epoch": 3.83,
"learning_rate": 2.7270190478306378e-05,
"loss": 0.6411,
"step": 1671
},
{
"epoch": 3.83,
"learning_rate": 2.7168277296433053e-05,
"loss": 0.7872,
"step": 1672
},
{
"epoch": 3.83,
"learning_rate": 2.7066524960565965e-05,
"loss": 0.7556,
"step": 1673
},
{
"epoch": 3.84,
"learning_rate": 2.6964933695421192e-05,
"loss": 0.8606,
"step": 1674
},
{
"epoch": 3.84,
"learning_rate": 2.6863503725359107e-05,
"loss": 0.7776,
"step": 1675
},
{
"epoch": 3.84,
"learning_rate": 2.6762235274383772e-05,
"loss": 0.7095,
"step": 1676
},
{
"epoch": 3.84,
"learning_rate": 2.666112856614259e-05,
"loss": 0.8587,
"step": 1677
},
{
"epoch": 3.84,
"learning_rate": 2.65601838239258e-05,
"loss": 0.8568,
"step": 1678
},
{
"epoch": 3.85,
"learning_rate": 2.6459401270665894e-05,
"loss": 0.7725,
"step": 1679
},
{
"epoch": 3.85,
"learning_rate": 2.6358781128937172e-05,
"loss": 0.8665,
"step": 1680
},
{
"epoch": 3.85,
"learning_rate": 2.625832362095528e-05,
"loss": 0.8286,
"step": 1681
},
{
"epoch": 3.85,
"learning_rate": 2.6158028968576743e-05,
"loss": 0.9445,
"step": 1682
},
{
"epoch": 3.86,
"learning_rate": 2.6057897393298324e-05,
"loss": 0.7562,
"step": 1683
},
{
"epoch": 3.86,
"learning_rate": 2.5957929116256675e-05,
"loss": 0.8086,
"step": 1684
},
{
"epoch": 3.86,
"learning_rate": 2.5858124358227853e-05,
"loss": 0.8513,
"step": 1685
},
{
"epoch": 3.86,
"learning_rate": 2.5758483339626738e-05,
"loss": 0.7107,
"step": 1686
},
{
"epoch": 3.86,
"learning_rate": 2.565900628050659e-05,
"loss": 0.7926,
"step": 1687
},
{
"epoch": 3.87,
"learning_rate": 2.5559693400558658e-05,
"loss": 0.7839,
"step": 1688
},
{
"epoch": 3.87,
"learning_rate": 2.546054491911147e-05,
"loss": 0.8132,
"step": 1689
},
{
"epoch": 3.87,
"learning_rate": 2.536156105513062e-05,
"loss": 0.6755,
"step": 1690
},
{
"epoch": 3.87,
"learning_rate": 2.52627420272181e-05,
"loss": 0.7823,
"step": 1691
},
{
"epoch": 3.88,
"learning_rate": 2.5164088053611845e-05,
"loss": 0.8078,
"step": 1692
},
{
"epoch": 3.88,
"learning_rate": 2.5065599352185254e-05,
"loss": 0.7328,
"step": 1693
},
{
"epoch": 3.88,
"learning_rate": 2.4967276140446826e-05,
"loss": 0.9089,
"step": 1694
},
{
"epoch": 3.88,
"learning_rate": 2.48691186355395e-05,
"loss": 0.7683,
"step": 1695
},
{
"epoch": 3.89,
"learning_rate": 2.477112705424024e-05,
"loss": 0.7681,
"step": 1696
},
{
"epoch": 3.89,
"learning_rate": 2.4673301612959654e-05,
"loss": 0.8331,
"step": 1697
},
{
"epoch": 3.89,
"learning_rate": 2.4575642527741415e-05,
"loss": 0.7678,
"step": 1698
},
{
"epoch": 3.89,
"learning_rate": 2.447815001426177e-05,
"loss": 0.7815,
"step": 1699
},
{
"epoch": 3.89,
"learning_rate": 2.4380824287829074e-05,
"loss": 0.9155,
"step": 1700
},
{
"epoch": 3.9,
"learning_rate": 2.428366556338344e-05,
"loss": 0.7475,
"step": 1701
},
{
"epoch": 3.9,
"learning_rate": 2.4186674055496083e-05,
"loss": 0.6909,
"step": 1702
},
{
"epoch": 3.9,
"learning_rate": 2.4089849978368918e-05,
"loss": 0.7278,
"step": 1703
},
{
"epoch": 3.9,
"learning_rate": 2.399319354583418e-05,
"loss": 0.8053,
"step": 1704
},
{
"epoch": 3.91,
"learning_rate": 2.389670497135379e-05,
"loss": 0.6703,
"step": 1705
},
{
"epoch": 3.91,
"learning_rate": 2.3800384468018954e-05,
"loss": 0.7334,
"step": 1706
},
{
"epoch": 3.91,
"learning_rate": 2.370423224854975e-05,
"loss": 0.7021,
"step": 1707
},
{
"epoch": 3.91,
"learning_rate": 2.3608248525294628e-05,
"loss": 0.7711,
"step": 1708
},
{
"epoch": 3.92,
"learning_rate": 2.3512433510229858e-05,
"loss": 0.8555,
"step": 1709
},
{
"epoch": 3.92,
"learning_rate": 2.3416787414959097e-05,
"loss": 0.7019,
"step": 1710
},
{
"epoch": 3.92,
"learning_rate": 2.3321310450713062e-05,
"loss": 0.9331,
"step": 1711
},
{
"epoch": 3.92,
"learning_rate": 2.322600282834888e-05,
"loss": 0.7915,
"step": 1712
},
{
"epoch": 3.92,
"learning_rate": 2.3130864758349645e-05,
"loss": 0.8168,
"step": 1713
},
{
"epoch": 3.93,
"learning_rate": 2.303589645082411e-05,
"loss": 0.7711,
"step": 1714
},
{
"epoch": 3.93,
"learning_rate": 2.2941098115506065e-05,
"loss": 0.7319,
"step": 1715
},
{
"epoch": 3.93,
"learning_rate": 2.2846469961753915e-05,
"loss": 0.7473,
"step": 1716
},
{
"epoch": 3.93,
"learning_rate": 2.27520121985502e-05,
"loss": 0.7365,
"step": 1717
},
{
"epoch": 3.94,
"learning_rate": 2.265772503450122e-05,
"loss": 0.9078,
"step": 1718
},
{
"epoch": 3.94,
"learning_rate": 2.256360867783648e-05,
"loss": 0.6878,
"step": 1719
},
{
"epoch": 3.94,
"learning_rate": 2.246966333640823e-05,
"loss": 0.7913,
"step": 1720
},
{
"epoch": 3.94,
"learning_rate": 2.2375889217691137e-05,
"loss": 0.8684,
"step": 1721
},
{
"epoch": 3.95,
"learning_rate": 2.2282286528781605e-05,
"loss": 0.7516,
"step": 1722
},
{
"epoch": 3.95,
"learning_rate": 2.218885547639754e-05,
"loss": 0.787,
"step": 1723
},
{
"epoch": 3.95,
"learning_rate": 2.2095596266877782e-05,
"loss": 0.801,
"step": 1724
},
{
"epoch": 3.95,
"learning_rate": 2.2002509106181624e-05,
"loss": 0.8423,
"step": 1725
},
{
"epoch": 3.95,
"learning_rate": 2.1909594199888372e-05,
"loss": 0.6984,
"step": 1726
},
{
"epoch": 3.96,
"learning_rate": 2.181685175319702e-05,
"loss": 0.7593,
"step": 1727
},
{
"epoch": 3.96,
"learning_rate": 2.172428197092561e-05,
"loss": 0.7661,
"step": 1728
},
{
"epoch": 3.96,
"learning_rate": 2.1631885057510838e-05,
"loss": 0.8231,
"step": 1729
},
{
"epoch": 3.96,
"learning_rate": 2.153966121700769e-05,
"loss": 0.7426,
"step": 1730
},
{
"epoch": 3.97,
"learning_rate": 2.1447610653088947e-05,
"loss": 0.7836,
"step": 1731
},
{
"epoch": 3.97,
"learning_rate": 2.1355733569044635e-05,
"loss": 0.9467,
"step": 1732
},
{
"epoch": 3.97,
"learning_rate": 2.126403016778168e-05,
"loss": 0.8632,
"step": 1733
},
{
"epoch": 3.97,
"learning_rate": 2.117250065182349e-05,
"loss": 0.8532,
"step": 1734
},
{
"epoch": 3.97,
"learning_rate": 2.1081145223309395e-05,
"loss": 0.769,
"step": 1735
},
{
"epoch": 3.98,
"learning_rate": 2.0989964083994252e-05,
"loss": 0.6967,
"step": 1736
},
{
"epoch": 3.98,
"learning_rate": 2.08989574352481e-05,
"loss": 0.7737,
"step": 1737
},
{
"epoch": 3.98,
"learning_rate": 2.0808125478055505e-05,
"loss": 0.5646,
"step": 1738
},
{
"epoch": 3.98,
"learning_rate": 2.0717468413015283e-05,
"loss": 0.7515,
"step": 1739
},
{
"epoch": 3.99,
"learning_rate": 2.0626986440340035e-05,
"loss": 0.718,
"step": 1740
},
{
"epoch": 3.99,
"learning_rate": 2.053667975985567e-05,
"loss": 0.8102,
"step": 1741
},
{
"epoch": 3.99,
"learning_rate": 2.0446548571000935e-05,
"loss": 0.8485,
"step": 1742
},
{
"epoch": 3.99,
"learning_rate": 2.035659307282699e-05,
"loss": 0.7086,
"step": 1743
},
{
"epoch": 4.0,
"learning_rate": 2.0266813463997092e-05,
"loss": 0.7731,
"step": 1744
},
{
"epoch": 4.0,
"learning_rate": 2.0177209942785958e-05,
"loss": 0.5973,
"step": 1745
},
{
"epoch": 4.0,
"learning_rate": 2.008778270707944e-05,
"loss": 0.8096,
"step": 1746
}
],
"max_steps": 2180,
"num_train_epochs": 5,
"total_flos": 472941381419008.0,
"trial_name": null,
"trial_params": null
}