File size: 12,238 Bytes
08f9258 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 |
{
"best_metric": 0.6611545464384188,
"best_model_checkpoint": "../saved_model/tibetan-bert_tncc-document_v3/checkpoint-3234",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 4620,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6271739130434782,
"eval_loss": 1.15811288356781,
"eval_macro-f1": 0.5321491435605941,
"eval_macro-precision": 0.5499509034708364,
"eval_macro-recall": 0.5513303846525741,
"eval_runtime": 7.2664,
"eval_samples_per_second": 126.61,
"eval_steps_per_second": 3.991,
"eval_weighted-f1": 0.6286606311348418,
"eval_weighted-precision": 0.6570526714130747,
"eval_weighted-recall": 0.6271739130434782,
"step": 231
},
{
"epoch": 2.0,
"eval_accuracy": 0.6467391304347826,
"eval_loss": 1.068244218826294,
"eval_macro-f1": 0.5647362494157037,
"eval_macro-precision": 0.6183747770237591,
"eval_macro-recall": 0.5557985823725112,
"eval_runtime": 7.286,
"eval_samples_per_second": 126.269,
"eval_steps_per_second": 3.98,
"eval_weighted-f1": 0.6340133102826743,
"eval_weighted-precision": 0.661646599688849,
"eval_weighted-recall": 0.6467391304347826,
"step": 462
},
{
"epoch": 2.16,
"learning_rate": 4.458874458874459e-05,
"loss": 1.1735,
"step": 500
},
{
"epoch": 3.0,
"eval_accuracy": 0.6760869565217391,
"eval_loss": 0.9934693574905396,
"eval_macro-f1": 0.5945571067918397,
"eval_macro-precision": 0.6219488737703451,
"eval_macro-recall": 0.5845017075090547,
"eval_runtime": 7.2737,
"eval_samples_per_second": 126.483,
"eval_steps_per_second": 3.987,
"eval_weighted-f1": 0.6693675078151828,
"eval_weighted-precision": 0.6716730940106624,
"eval_weighted-recall": 0.6760869565217391,
"step": 693
},
{
"epoch": 4.0,
"eval_accuracy": 0.6760869565217391,
"eval_loss": 1.0614756345748901,
"eval_macro-f1": 0.6069076519660962,
"eval_macro-precision": 0.6513367891333143,
"eval_macro-recall": 0.5927988194005301,
"eval_runtime": 7.33,
"eval_samples_per_second": 125.511,
"eval_steps_per_second": 3.956,
"eval_weighted-f1": 0.668192819938365,
"eval_weighted-precision": 0.6913129775105903,
"eval_weighted-recall": 0.6760869565217391,
"step": 924
},
{
"epoch": 4.33,
"learning_rate": 3.917748917748918e-05,
"loss": 0.6662,
"step": 1000
},
{
"epoch": 5.0,
"eval_accuracy": 0.6608695652173913,
"eval_loss": 1.1701490879058838,
"eval_macro-f1": 0.594957375773476,
"eval_macro-precision": 0.6170952632020997,
"eval_macro-recall": 0.6011833102671116,
"eval_runtime": 7.2756,
"eval_samples_per_second": 126.45,
"eval_steps_per_second": 3.986,
"eval_weighted-f1": 0.6654696189795196,
"eval_weighted-precision": 0.6851333084994409,
"eval_weighted-recall": 0.6608695652173913,
"step": 1155
},
{
"epoch": 6.0,
"eval_accuracy": 0.6510869565217391,
"eval_loss": 1.292517066001892,
"eval_macro-f1": 0.607937784808065,
"eval_macro-precision": 0.630719857369284,
"eval_macro-recall": 0.6225829586625629,
"eval_runtime": 7.2833,
"eval_samples_per_second": 126.317,
"eval_steps_per_second": 3.982,
"eval_weighted-f1": 0.6579657588719497,
"eval_weighted-precision": 0.68649865311892,
"eval_weighted-recall": 0.6510869565217391,
"step": 1386
},
{
"epoch": 6.49,
"learning_rate": 3.376623376623377e-05,
"loss": 0.3247,
"step": 1500
},
{
"epoch": 7.0,
"eval_accuracy": 0.6695652173913044,
"eval_loss": 1.3797581195831299,
"eval_macro-f1": 0.6129092280782845,
"eval_macro-precision": 0.6387021571282967,
"eval_macro-recall": 0.609094564757661,
"eval_runtime": 7.2749,
"eval_samples_per_second": 126.461,
"eval_steps_per_second": 3.986,
"eval_weighted-f1": 0.6640159054651581,
"eval_weighted-precision": 0.6700483998986603,
"eval_weighted-recall": 0.6695652173913044,
"step": 1617
},
{
"epoch": 8.0,
"eval_accuracy": 0.6706521739130434,
"eval_loss": 1.4838347434997559,
"eval_macro-f1": 0.6061542359667785,
"eval_macro-precision": 0.611272244576803,
"eval_macro-recall": 0.6146650230118652,
"eval_runtime": 7.2825,
"eval_samples_per_second": 126.33,
"eval_steps_per_second": 3.982,
"eval_weighted-f1": 0.6720993101087696,
"eval_weighted-precision": 0.6820628069048843,
"eval_weighted-recall": 0.6706521739130434,
"step": 1848
},
{
"epoch": 8.66,
"learning_rate": 2.8354978354978357e-05,
"loss": 0.1507,
"step": 2000
},
{
"epoch": 9.0,
"eval_accuracy": 0.6880434782608695,
"eval_loss": 1.5808299779891968,
"eval_macro-f1": 0.6506642163845188,
"eval_macro-precision": 0.6540683372674448,
"eval_macro-recall": 0.6603270904104771,
"eval_runtime": 7.2595,
"eval_samples_per_second": 126.731,
"eval_steps_per_second": 3.995,
"eval_weighted-f1": 0.6915267384661025,
"eval_weighted-precision": 0.7075992309158492,
"eval_weighted-recall": 0.6880434782608695,
"step": 2079
},
{
"epoch": 10.0,
"eval_accuracy": 0.6717391304347826,
"eval_loss": 1.650195837020874,
"eval_macro-f1": 0.6034609083187376,
"eval_macro-precision": 0.6321546665662738,
"eval_macro-recall": 0.5917026419660609,
"eval_runtime": 7.2865,
"eval_samples_per_second": 126.261,
"eval_steps_per_second": 3.98,
"eval_weighted-f1": 0.6684456866047149,
"eval_weighted-precision": 0.6745276629705688,
"eval_weighted-recall": 0.6717391304347826,
"step": 2310
},
{
"epoch": 10.82,
"learning_rate": 2.2943722943722946e-05,
"loss": 0.0896,
"step": 2500
},
{
"epoch": 11.0,
"eval_accuracy": 0.6804347826086956,
"eval_loss": 1.738294005393982,
"eval_macro-f1": 0.6302114432029545,
"eval_macro-precision": 0.64659326690522,
"eval_macro-recall": 0.6353590685660309,
"eval_runtime": 7.3402,
"eval_samples_per_second": 125.337,
"eval_steps_per_second": 3.951,
"eval_weighted-f1": 0.6820100289567567,
"eval_weighted-precision": 0.6975783236550734,
"eval_weighted-recall": 0.6804347826086956,
"step": 2541
},
{
"epoch": 12.0,
"eval_accuracy": 0.6989130434782609,
"eval_loss": 1.7147595882415771,
"eval_macro-f1": 0.6515414811628367,
"eval_macro-precision": 0.6658442974988088,
"eval_macro-recall": 0.6496260625897462,
"eval_runtime": 7.2787,
"eval_samples_per_second": 126.396,
"eval_steps_per_second": 3.984,
"eval_weighted-f1": 0.6976782715450106,
"eval_weighted-precision": 0.7017023034717548,
"eval_weighted-recall": 0.6989130434782609,
"step": 2772
},
{
"epoch": 12.99,
"learning_rate": 1.7532467532467535e-05,
"loss": 0.0646,
"step": 3000
},
{
"epoch": 13.0,
"eval_accuracy": 0.6891304347826087,
"eval_loss": 1.7946357727050781,
"eval_macro-f1": 0.648332711071471,
"eval_macro-precision": 0.6479765490771864,
"eval_macro-recall": 0.6515217848664077,
"eval_runtime": 7.3066,
"eval_samples_per_second": 125.913,
"eval_steps_per_second": 3.969,
"eval_weighted-f1": 0.6915983518325557,
"eval_weighted-precision": 0.6986485129748002,
"eval_weighted-recall": 0.6891304347826087,
"step": 3003
},
{
"epoch": 14.0,
"eval_accuracy": 0.7,
"eval_loss": 1.7724699974060059,
"eval_macro-f1": 0.6611545464384188,
"eval_macro-precision": 0.667409141168159,
"eval_macro-recall": 0.6627804433172214,
"eval_runtime": 7.2494,
"eval_samples_per_second": 126.906,
"eval_steps_per_second": 4.0,
"eval_weighted-f1": 0.7033455944346818,
"eval_weighted-precision": 0.7140252489602517,
"eval_weighted-recall": 0.7,
"step": 3234
},
{
"epoch": 15.0,
"eval_accuracy": 0.6923913043478261,
"eval_loss": 1.819846510887146,
"eval_macro-f1": 0.6556012492821643,
"eval_macro-precision": 0.6602686382879858,
"eval_macro-recall": 0.6668664107682606,
"eval_runtime": 7.2775,
"eval_samples_per_second": 126.418,
"eval_steps_per_second": 3.985,
"eval_weighted-f1": 0.6965952163097968,
"eval_weighted-precision": 0.7083445248462037,
"eval_weighted-recall": 0.6923913043478261,
"step": 3465
},
{
"epoch": 15.15,
"learning_rate": 1.2121212121212122e-05,
"loss": 0.042,
"step": 3500
},
{
"epoch": 16.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.784122109413147,
"eval_macro-f1": 0.6563585998978742,
"eval_macro-precision": 0.6655291218706761,
"eval_macro-recall": 0.6534120619783158,
"eval_runtime": 7.297,
"eval_samples_per_second": 126.08,
"eval_steps_per_second": 3.974,
"eval_weighted-f1": 0.6949462526633576,
"eval_weighted-precision": 0.6993999521302994,
"eval_weighted-recall": 0.6945652173913044,
"step": 3696
},
{
"epoch": 17.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.7921020984649658,
"eval_macro-f1": 0.654978142271046,
"eval_macro-precision": 0.6614421486999998,
"eval_macro-recall": 0.657140253465508,
"eval_runtime": 7.359,
"eval_samples_per_second": 125.018,
"eval_steps_per_second": 3.941,
"eval_weighted-f1": 0.6972072799287201,
"eval_weighted-precision": 0.705999712282921,
"eval_weighted-recall": 0.6945652173913044,
"step": 3927
},
{
"epoch": 17.32,
"learning_rate": 6.709956709956711e-06,
"loss": 0.0314,
"step": 4000
},
{
"epoch": 18.0,
"eval_accuracy": 0.6945652173913044,
"eval_loss": 1.824020266532898,
"eval_macro-f1": 0.6548614235086001,
"eval_macro-precision": 0.6544996322588115,
"eval_macro-recall": 0.660921203092836,
"eval_runtime": 7.3304,
"eval_samples_per_second": 125.504,
"eval_steps_per_second": 3.956,
"eval_weighted-f1": 0.6960752624759597,
"eval_weighted-precision": 0.7018632204313372,
"eval_weighted-recall": 0.6945652173913044,
"step": 4158
},
{
"epoch": 19.0,
"eval_accuracy": 0.6923913043478261,
"eval_loss": 1.8412573337554932,
"eval_macro-f1": 0.6506506908222951,
"eval_macro-precision": 0.6468541951851238,
"eval_macro-recall": 0.6600299174294355,
"eval_runtime": 7.2351,
"eval_samples_per_second": 127.157,
"eval_steps_per_second": 4.008,
"eval_weighted-f1": 0.69621624934211,
"eval_weighted-precision": 0.7054368084525598,
"eval_weighted-recall": 0.6923913043478261,
"step": 4389
},
{
"epoch": 19.48,
"learning_rate": 1.2987012987012988e-06,
"loss": 0.0233,
"step": 4500
},
{
"epoch": 20.0,
"eval_accuracy": 0.691304347826087,
"eval_loss": 1.8325966596603394,
"eval_macro-f1": 0.6463398836192601,
"eval_macro-precision": 0.6403230244612891,
"eval_macro-recall": 0.6567581821874694,
"eval_runtime": 7.2741,
"eval_samples_per_second": 126.476,
"eval_steps_per_second": 3.987,
"eval_weighted-f1": 0.6940975925997279,
"eval_weighted-precision": 0.7008420722361637,
"eval_weighted-recall": 0.691304347826087,
"step": 4620
}
],
"logging_steps": 500,
"max_steps": 4620,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.87544755290112e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}
|