|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998001465591899, |
|
"eval_steps": 500, |
|
"global_step": 938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.705882352941177e-06, |
|
"loss": 2.3894, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.411764705882354e-06, |
|
"loss": 2.4461, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.411764705882353e-05, |
|
"loss": 2.5984, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8823529411764708e-05, |
|
"loss": 2.7012, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.3529411764705884e-05, |
|
"loss": 2.5558, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.823529411764706e-05, |
|
"loss": 2.7513, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.294117647058824e-05, |
|
"loss": 2.7527, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.7647058823529415e-05, |
|
"loss": 2.6276, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.235294117647059e-05, |
|
"loss": 2.6711, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.705882352941177e-05, |
|
"loss": 2.6975, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.176470588235295e-05, |
|
"loss": 2.5489, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5.647058823529412e-05, |
|
"loss": 2.5387, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.11764705882353e-05, |
|
"loss": 2.5694, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.588235294117648e-05, |
|
"loss": 2.6244, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.058823529411765e-05, |
|
"loss": 2.5018, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.529411764705883e-05, |
|
"loss": 2.4123, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8e-05, |
|
"loss": 2.3234, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.470588235294118e-05, |
|
"loss": 2.0958, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 8.941176470588236e-05, |
|
"loss": 2.2023, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.411764705882353e-05, |
|
"loss": 2.1864, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.882352941176471e-05, |
|
"loss": 2.0768, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001035294117647059, |
|
"loss": 2.189, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00010823529411764706, |
|
"loss": 1.9094, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00011294117647058824, |
|
"loss": 2.0136, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00011764705882352942, |
|
"loss": 1.7677, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001223529411764706, |
|
"loss": 2.3789, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00012705882352941175, |
|
"loss": 2.2419, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013176470588235296, |
|
"loss": 2.2654, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00013647058823529413, |
|
"loss": 2.3797, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001411764705882353, |
|
"loss": 2.319, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00014588235294117646, |
|
"loss": 2.3527, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00015058823529411766, |
|
"loss": 2.129, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00015529411764705884, |
|
"loss": 2.2285, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00016, |
|
"loss": 2.2231, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001647058823529412, |
|
"loss": 2.0318, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00016941176470588237, |
|
"loss": 2.2135, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00017411764705882354, |
|
"loss": 2.093, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00017882352941176472, |
|
"loss": 2.0507, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001835294117647059, |
|
"loss": 2.115, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00018823529411764707, |
|
"loss": 2.1991, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019294117647058825, |
|
"loss": 2.1561, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019764705882352942, |
|
"loss": 2.1816, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019999993373829138, |
|
"loss": 1.9079, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019999940364514925, |
|
"loss": 2.1371, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999834346167496, |
|
"loss": 2.0254, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019999675319348848, |
|
"loss": 1.9081, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001999946328490197, |
|
"loss": 1.9681, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001999919824395085, |
|
"loss": 2.1084, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019998880197900446, |
|
"loss": 1.8421, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019998509148436697, |
|
"loss": 2.0253, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001999808509752652, |
|
"loss": 2.2719, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019997608047417776, |
|
"loss": 2.3961, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001999707800063928, |
|
"loss": 2.2332, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019996494960000774, |
|
"loss": 2.3148, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019995858928592916, |
|
"loss": 2.2659, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019995169909787271, |
|
"loss": 2.3069, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001999442790723628, |
|
"loss": 2.1792, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001999363292487325, |
|
"loss": 2.2628, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019992784966912333, |
|
"loss": 2.3282, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019991884037848497, |
|
"loss": 2.286, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019990930142457515, |
|
"loss": 2.2392, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019989923285795914, |
|
"loss": 2.1468, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001998886347320098, |
|
"loss": 2.2453, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019987750710290713, |
|
"loss": 2.1149, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019986585002963793, |
|
"loss": 2.1051, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019985366357399564, |
|
"loss": 2.0007, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019984094780057978, |
|
"loss": 1.963, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019982770277679596, |
|
"loss": 1.9235, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019981392857285505, |
|
"loss": 1.861, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001997996252617733, |
|
"loss": 2.1247, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019978479291937165, |
|
"loss": 1.7524, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001997694316242753, |
|
"loss": 1.9346, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019975354145791355, |
|
"loss": 1.6186, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019973712250451908, |
|
"loss": 1.9757, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019972017485112774, |
|
"loss": 1.9019, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019970269858757787, |
|
"loss": 2.2553, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019968469380651015, |
|
"loss": 2.2539, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019966616060336655, |
|
"loss": 2.173, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019964709907639057, |
|
"loss": 2.1496, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000199627509326626, |
|
"loss": 2.2138, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019960739145791684, |
|
"loss": 2.2994, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019958674557690666, |
|
"loss": 2.2498, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019956557179303788, |
|
"loss": 2.2241, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019954387021855138, |
|
"loss": 2.257, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019952164096848578, |
|
"loss": 2.1482, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019949888416067688, |
|
"loss": 2.1853, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019947559991575706, |
|
"loss": 2.1136, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019945178835715458, |
|
"loss": 2.1496, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019942744961109297, |
|
"loss": 2.0761, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001994025838065903, |
|
"loss": 2.0665, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019937719107545864, |
|
"loss": 2.1202, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019935127155230314, |
|
"loss": 1.9078, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001993248253745216, |
|
"loss": 1.9247, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001992978526823034, |
|
"loss": 1.8159, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019927035361862904, |
|
"loss": 1.7471, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001992423283292693, |
|
"loss": 1.8876, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019921377696278437, |
|
"loss": 1.771, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019918469967052327, |
|
"loss": 1.7633, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019915509660662275, |
|
"loss": 1.7469, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019912496792800677, |
|
"loss": 1.9769, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019909431379438544, |
|
"loss": 2.2334, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001990631343682544, |
|
"loss": 2.226, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019903142981489373, |
|
"loss": 2.129, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001989992003023672, |
|
"loss": 2.2408, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019896644600152135, |
|
"loss": 2.142, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001989331670859846, |
|
"loss": 2.3385, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019889936373216634, |
|
"loss": 2.2516, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001988650361192559, |
|
"loss": 2.2209, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019883018442922178, |
|
"loss": 2.1603, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001987948088468105, |
|
"loss": 2.2952, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019875890955954573, |
|
"loss": 2.2628, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019872248675772722, |
|
"loss": 2.2564, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001986855406344299, |
|
"loss": 2.0969, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019864807138550273, |
|
"loss": 2.1252, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019861007920956786, |
|
"loss": 1.9448, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001985715643080192, |
|
"loss": 1.9488, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019853252688502187, |
|
"loss": 1.9668, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019849296714751063, |
|
"loss": 1.8091, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001984528853051891, |
|
"loss": 1.9742, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019841228157052853, |
|
"loss": 1.6913, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019837115615876664, |
|
"loss": 1.8882, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019832950928790657, |
|
"loss": 1.5621, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001982873411787157, |
|
"loss": 1.6418, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001982446520547244, |
|
"loss": 1.837, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019820144214222497, |
|
"loss": 1.8345, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019815771167027034, |
|
"loss": 2.2542, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019811346087067287, |
|
"loss": 2.3098, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019806868997800317, |
|
"loss": 2.1615, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001980233992295889, |
|
"loss": 2.314, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019797758886551324, |
|
"loss": 2.2309, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001979312591286141, |
|
"loss": 2.1631, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019788441026448225, |
|
"loss": 2.0791, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001978370425214606, |
|
"loss": 2.2653, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001977891561506424, |
|
"loss": 2.2547, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019774075140587024, |
|
"loss": 2.2326, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019769182854373444, |
|
"loss": 2.0206, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000197642387823572, |
|
"loss": 2.0665, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019759242950746487, |
|
"loss": 2.1532, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001975419538602389, |
|
"loss": 2.0988, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001974909611494622, |
|
"loss": 1.937, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001974394516454438, |
|
"loss": 1.9614, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019738742562123225, |
|
"loss": 1.8518, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019733488335261408, |
|
"loss": 1.7994, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019728182511811245, |
|
"loss": 1.9275, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019722825119898566, |
|
"loss": 1.9991, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001971741618792255, |
|
"loss": 1.6737, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019711955744555594, |
|
"loss": 1.4669, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0001970644381874316, |
|
"loss": 1.5002, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019700880439703602, |
|
"loss": 1.9151, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019695265636928032, |
|
"loss": 1.7493, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019689599440180153, |
|
"loss": 2.2535, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019683881879496107, |
|
"loss": 2.2597, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019678112985184308, |
|
"loss": 2.3117, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019672292787825292, |
|
"loss": 2.1624, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019666421318271547, |
|
"loss": 2.1857, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001966049860764735, |
|
"loss": 2.2101, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019654524687348607, |
|
"loss": 2.1749, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019648499589042676, |
|
"loss": 2.1557, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019642423344668218, |
|
"loss": 2.2259, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019636295986435003, |
|
"loss": 2.1723, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019630117546823759, |
|
"loss": 2.294, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019623888058585993, |
|
"loss": 2.2241, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019617607554743818, |
|
"loss": 2.1496, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019611276068589776, |
|
"loss": 2.1116, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019604893633686662, |
|
"loss": 2.1723, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001959846028386735, |
|
"loss": 2.0301, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019591976053234608, |
|
"loss": 1.8651, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001958544097616092, |
|
"loss": 1.8286, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019578855087288302, |
|
"loss": 1.9081, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001957221842152813, |
|
"loss": 1.8546, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001956553101406093, |
|
"loss": 1.7914, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019558792900336216, |
|
"loss": 1.7997, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019552004116072294, |
|
"loss": 1.3596, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001954516469725606, |
|
"loss": 1.6549, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019538274680142834, |
|
"loss": 1.6592, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019531334101256147, |
|
"loss": 2.2759, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019524342997387557, |
|
"loss": 2.1805, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001951730140559645, |
|
"loss": 2.2585, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019510209363209847, |
|
"loss": 2.2573, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019503066907822198, |
|
"loss": 2.3154, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000194958740772952, |
|
"loss": 2.2306, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019488630909757579, |
|
"loss": 2.3178, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019481337443604893, |
|
"loss": 2.2691, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001947399371749933, |
|
"loss": 2.153, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019466599770369509, |
|
"loss": 2.0471, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019459155641410257, |
|
"loss": 2.3142, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019451661370082426, |
|
"loss": 2.1241, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001944411699611265, |
|
"loss": 2.1079, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001943652255949317, |
|
"loss": 2.063, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019428878100481606, |
|
"loss": 2.0062, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019421183659600725, |
|
"loss": 1.9, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019413439277638265, |
|
"loss": 1.9723, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019405644995646696, |
|
"loss": 1.969, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019397800854942986, |
|
"loss": 2.0202, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019389906897108428, |
|
"loss": 1.9028, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001938196316398837, |
|
"loss": 1.8172, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019373969697692028, |
|
"loss": 1.7243, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019365926540592247, |
|
"loss": 1.4621, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001935783373532528, |
|
"loss": 1.7987, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019349691324790555, |
|
"loss": 1.7935, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001934149935215047, |
|
"loss": 2.2858, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019333257860830135, |
|
"loss": 2.3798, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019324966894517155, |
|
"loss": 2.1534, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019316626497161408, |
|
"loss": 2.2648, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019308236712974795, |
|
"loss": 2.2946, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001929979758643102, |
|
"loss": 2.319, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019291309162265338, |
|
"loss": 2.2271, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001928277148547434, |
|
"loss": 2.0746, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019274184601315687, |
|
"loss": 2.1231, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000192655485553079, |
|
"loss": 2.3176, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019256863393230094, |
|
"loss": 2.1657, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019248129161121748, |
|
"loss": 2.2696, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001923934590528246, |
|
"loss": 2.2132, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019230513672271698, |
|
"loss": 2.104, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001922163250890855, |
|
"loss": 1.9319, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001921270246227149, |
|
"loss": 1.9152, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019203723579698108, |
|
"loss": 1.8615, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019194695908784882, |
|
"loss": 2.0695, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001918561949738691, |
|
"loss": 1.8626, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001917649439361765, |
|
"loss": 1.7538, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019167320645848695, |
|
"loss": 2.0082, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019158098302709476, |
|
"loss": 1.6011, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019148827413087034, |
|
"loss": 1.5286, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019139508026125754, |
|
"loss": 1.8795, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000191301401912271, |
|
"loss": 1.7509, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019120723958049353, |
|
"loss": 2.3155, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001911125937650736, |
|
"loss": 2.2125, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019101746496772242, |
|
"loss": 2.2262, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001909218536927116, |
|
"loss": 2.3185, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001908257604468703, |
|
"loss": 2.2496, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019072918573958254, |
|
"loss": 2.3569, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001906321300827846, |
|
"loss": 2.1697, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019053459399096215, |
|
"loss": 2.197, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019043657798114766, |
|
"loss": 1.9929, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019033808257291768, |
|
"loss": 2.1488, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001902391082883899, |
|
"loss": 2.2224, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019013965565222062, |
|
"loss": 2.0495, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019003972519160178, |
|
"loss": 2.0964, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001899393174362582, |
|
"loss": 1.8708, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018983843291844492, |
|
"loss": 1.9741, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00018973707217294415, |
|
"loss": 1.9908, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018963523573706264, |
|
"loss": 1.7973, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001895329241506287, |
|
"loss": 1.9293, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018943013795598944, |
|
"loss": 1.94, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018932687769800767, |
|
"loss": 1.9435, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00018922314392405944, |
|
"loss": 1.7907, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018911893718403063, |
|
"loss": 1.6344, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018901425803031447, |
|
"loss": 1.3563, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018890910701780826, |
|
"loss": 1.7, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018880348470391077, |
|
"loss": 1.7682, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018869739164851889, |
|
"loss": 2.2041, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018859082841402513, |
|
"loss": 2.237, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001884837955653142, |
|
"loss": 2.1694, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00018837629366976025, |
|
"loss": 2.2373, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001882683232972239, |
|
"loss": 2.2511, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.000188159885020049, |
|
"loss": 2.2474, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018805097941305984, |
|
"loss": 2.1938, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018794160705355796, |
|
"loss": 2.0874, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018783176852131908, |
|
"loss": 2.2795, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00018772146439859015, |
|
"loss": 2.179, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018761069527008613, |
|
"loss": 2.0959, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018749946172298698, |
|
"loss": 2.3345, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018738776434693447, |
|
"loss": 2.0522, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018727560373402917, |
|
"loss": 2.1055, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00018716298047882714, |
|
"loss": 2.0651, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018704989517833695, |
|
"loss": 2.0961, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001869363484320164, |
|
"loss": 1.9879, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00018682234084176945, |
|
"loss": 2.0118, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001867078730119429, |
|
"loss": 1.9059, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018659294554932324, |
|
"loss": 1.8227, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018647755906313348, |
|
"loss": 1.8794, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001863617141650299, |
|
"loss": 1.6243, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018624541146909873, |
|
"loss": 1.6429, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00018612865159185304, |
|
"loss": 2.03, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001860114351522293, |
|
"loss": 1.7897, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018589376277158425, |
|
"loss": 2.2226, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018577563507369153, |
|
"loss": 2.2998, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00018565705268473837, |
|
"loss": 2.1385, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001855380162333223, |
|
"loss": 2.1793, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001854185263504478, |
|
"loss": 2.2456, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018529858366952298, |
|
"loss": 2.162, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00018517818882635617, |
|
"loss": 2.1046, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001850573424591526, |
|
"loss": 2.1761, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018493604520851097, |
|
"loss": 2.2593, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018481429771742018, |
|
"loss": 2.2067, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018469210063125572, |
|
"loss": 2.1257, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00018456945459777643, |
|
"loss": 2.2823, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001844463602671209, |
|
"loss": 2.2942, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001843228182918042, |
|
"loss": 2.1126, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001841988293267143, |
|
"loss": 2.2014, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018407439402910858, |
|
"loss": 2.0244, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018394951305861055, |
|
"loss": 1.9842, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018382418707720604, |
|
"loss": 1.8637, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018369841674923998, |
|
"loss": 1.8229, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018357220274141262, |
|
"loss": 1.8023, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018344554572277628, |
|
"loss": 1.6493, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00018331844636473152, |
|
"loss": 1.4272, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018319090534102381, |
|
"loss": 1.5226, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001830629233277398, |
|
"loss": 1.5868, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018293450100330375, |
|
"loss": 1.6859, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00018280563904847415, |
|
"loss": 2.2781, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001826763381463398, |
|
"loss": 2.2743, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001825465989823164, |
|
"loss": 2.2974, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018241642224414272, |
|
"loss": 2.089, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018228580862187727, |
|
"loss": 2.3559, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018215475880789433, |
|
"loss": 2.2152, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018202327349688043, |
|
"loss": 2.1726, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018189135338583066, |
|
"loss": 2.2242, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018175899917404492, |
|
"loss": 2.2506, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018162621156312433, |
|
"loss": 2.1324, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018149299125696735, |
|
"loss": 2.006, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018135933896176612, |
|
"loss": 2.2178, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018122525538600282, |
|
"loss": 2.0817, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018109074124044572, |
|
"loss": 2.0006, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018095579723814557, |
|
"loss": 1.9424, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00018082042409443174, |
|
"loss": 1.9661, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018068462252690843, |
|
"loss": 1.8678, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018054839325545096, |
|
"loss": 1.8343, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001804117370022018, |
|
"loss": 1.8003, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001802746544915669, |
|
"loss": 1.7928, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018013714645021166, |
|
"loss": 1.8922, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017999921360705733, |
|
"loss": 1.6961, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001798608566932769, |
|
"loss": 1.3043, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017972207644229138, |
|
"loss": 1.8295, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001795828735897658, |
|
"loss": 1.7105, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00017944324887360553, |
|
"loss": 2.1881, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.000179303203033952, |
|
"loss": 2.2434, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001791627368131792, |
|
"loss": 2.3566, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017902185095588927, |
|
"loss": 2.3222, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00017888054620890915, |
|
"loss": 2.181, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017873882332128597, |
|
"loss": 2.3261, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017859668304428365, |
|
"loss": 2.2798, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017845412613137844, |
|
"loss": 2.0487, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017831115333825535, |
|
"loss": 2.1863, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00017816776542280377, |
|
"loss": 2.1308, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001780239631451138, |
|
"loss": 2.3122, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001778797472674719, |
|
"loss": 2.2757, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017773511855435708, |
|
"loss": 2.0241, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017759007777243672, |
|
"loss": 1.9896, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00017744462569056256, |
|
"loss": 1.9607, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017729876307976663, |
|
"loss": 1.9798, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017715249071325717, |
|
"loss": 1.9075, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017700580936641443, |
|
"loss": 2.0141, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00017685871981678672, |
|
"loss": 1.9238, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017671122284408614, |
|
"loss": 1.9244, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017656331923018457, |
|
"loss": 1.6621, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00017641500975910945, |
|
"loss": 1.7402, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001762662952170396, |
|
"loss": 1.3913, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001761171763923012, |
|
"loss": 1.5825, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001759676540753634, |
|
"loss": 1.6809, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017581772905883423, |
|
"loss": 2.3459, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017566740213745648, |
|
"loss": 2.1963, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017551667410810337, |
|
"loss": 2.1334, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00017536554576977442, |
|
"loss": 2.3778, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017521401792359108, |
|
"loss": 2.183, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001750620913727926, |
|
"loss": 2.1674, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017490976692273176, |
|
"loss": 2.3534, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00017475704538087055, |
|
"loss": 2.1677, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017460392755677592, |
|
"loss": 2.1642, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001744504142621155, |
|
"loss": 2.1983, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001742965063106533, |
|
"loss": 2.1661, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001741422045182453, |
|
"loss": 2.2135, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017398750970283532, |
|
"loss": 2.1288, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017383242268445047, |
|
"loss": 1.9906, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017367694428519696, |
|
"loss": 1.9031, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017352107532925569, |
|
"loss": 1.831, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017336481664287777, |
|
"loss": 1.9116, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00017320816905438044, |
|
"loss": 1.8241, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001730511333941423, |
|
"loss": 1.8745, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017289371049459922, |
|
"loss": 1.8306, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017273590119023968, |
|
"loss": 1.6032, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017257770631760058, |
|
"loss": 1.3571, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017241912671526265, |
|
"loss": 1.6282, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017226016322384604, |
|
"loss": 1.5942, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017210081668600586, |
|
"loss": 1.6988, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017194108794642775, |
|
"loss": 2.334, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00017178097785182337, |
|
"loss": 2.2731, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001716204872509259, |
|
"loss": 2.1642, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017145961699448559, |
|
"loss": 2.4107, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017129836793526517, |
|
"loss": 2.2767, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017113674092803543, |
|
"loss": 2.3137, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00017097473682957067, |
|
"loss": 2.3095, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017081235649864395, |
|
"loss": 2.1327, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017064960079602297, |
|
"loss": 2.2666, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017048647058446505, |
|
"loss": 2.2871, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017032296672871283, |
|
"loss": 2.2055, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017015909009548966, |
|
"loss": 2.1066, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016999484155349483, |
|
"loss": 2.1887, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016983022197339923, |
|
"loss": 2.1014, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016966523222784058, |
|
"loss": 2.1138, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00016949987319141868, |
|
"loss": 1.8663, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001693341457406911, |
|
"loss": 1.7421, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016916805075416823, |
|
"loss": 1.8293, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001690015891123088, |
|
"loss": 1.8993, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00016883476169751518, |
|
"loss": 1.8486, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001686675693941286, |
|
"loss": 1.7033, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016850001308842458, |
|
"loss": 1.5906, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016833209366860826, |
|
"loss": 1.4477, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016816381202480946, |
|
"loss": 1.8339, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016799516904907828, |
|
"loss": 1.6691, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00016782616563538016, |
|
"loss": 2.4258, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001676568026795912, |
|
"loss": 2.3445, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016748708107949337, |
|
"loss": 2.137, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016731700173476988, |
|
"loss": 2.2656, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00016714656554700022, |
|
"loss": 2.2945, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001669757734196556, |
|
"loss": 2.0641, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001668046262580939, |
|
"loss": 2.0698, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016663312496955517, |
|
"loss": 2.1787, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00016646127046315653, |
|
"loss": 2.0295, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0001662890636498875, |
|
"loss": 2.2736, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016611650544260526, |
|
"loss": 2.1205, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016594359675602962, |
|
"loss": 2.1249, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016577033850673824, |
|
"loss": 1.9355, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016559673161316188, |
|
"loss": 1.8423, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00016542277699557934, |
|
"loss": 2.0979, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016524847557611278, |
|
"loss": 1.7747, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00016507382827872264, |
|
"loss": 1.7137, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001648988360292029, |
|
"loss": 1.9926, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001647234997551761, |
|
"loss": 2.033, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016454782038608835, |
|
"loss": 1.7298, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016437179885320466, |
|
"loss": 1.8326, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016419543608960367, |
|
"loss": 1.5744, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016401873303017287, |
|
"loss": 1.471, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00016384169061160376, |
|
"loss": 1.7169, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016366430977238667, |
|
"loss": 1.5636, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016348659145280585, |
|
"loss": 2.3071, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016330853659493456, |
|
"loss": 2.1993, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00016313014614263003, |
|
"loss": 2.1583, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001629514210415284, |
|
"loss": 2.1996, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016277236223903986, |
|
"loss": 2.309, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016259297068434343, |
|
"loss": 2.2155, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001624132473283821, |
|
"loss": 2.2474, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00016223319312385766, |
|
"loss": 2.2034, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016205280902522576, |
|
"loss": 2.2016, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016187209598869074, |
|
"loss": 2.133, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016169105497220064, |
|
"loss": 2.1746, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016150968693544215, |
|
"loss": 2.0806, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016132799283983542, |
|
"loss": 2.2318, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.000161145973648529, |
|
"loss": 2.0729, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0001609636303263948, |
|
"loss": 1.8199, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016078096384002292, |
|
"loss": 2.0973, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016059797515771652, |
|
"loss": 1.7302, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00016041466524948663, |
|
"loss": 1.9608, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016023103508704725, |
|
"loss": 1.8483, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00016004708564380985, |
|
"loss": 1.9501, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001598628178948785, |
|
"loss": 1.6526, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0001596782328170445, |
|
"loss": 1.4811, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015949333138878138, |
|
"loss": 1.4048, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015930811459023957, |
|
"loss": 1.5489, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015912258340324126, |
|
"loss": 1.7197, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015893673881127524, |
|
"loss": 2.1217, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00015875058179949151, |
|
"loss": 2.2086, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015856411335469638, |
|
"loss": 2.094, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015837733446534688, |
|
"loss": 2.3415, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015819024612154575, |
|
"loss": 2.2378, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00015800284931503618, |
|
"loss": 2.1351, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001578151450391964, |
|
"loss": 2.1795, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015762713428903454, |
|
"loss": 2.0282, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015743881806118342, |
|
"loss": 2.2858, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015725019735389503, |
|
"loss": 2.0929, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00015706127316703557, |
|
"loss": 1.9913, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001568720465020798, |
|
"loss": 2.0128, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00015668251836210595, |
|
"loss": 1.8792, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001564926897517904, |
|
"loss": 1.6978, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001563025616774022, |
|
"loss": 2.0335, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001561121351467979, |
|
"loss": 1.845, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015592141116941628, |
|
"loss": 1.3927, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015573039075627256, |
|
"loss": 1.6165, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00015553907491995365, |
|
"loss": 1.6737, |
|
"step": 938 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 2814, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.51669593729024e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|