dat
commited on
Commit
β’
0e50dbb
1
Parent(s):
bab4e66
Saving weights and logs of step 225000
Browse files- checkpoint_225000 +3 -0
- config.json +0 -2
- events.out.tfevents.1626397185.t1v-n-f5c06ea1-w-0.760449.3.v2 +2 -2
- flax_model.msgpack +1 -1
- wandb/run-20210716_005946-3bte92ft/files/output.log +362 -0
- wandb/run-20210716_005946-3bte92ft/files/wandb-summary.json +1 -1
- wandb/run-20210716_005946-3bte92ft/logs/debug-internal.log +0 -0
- wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb +0 -0
checkpoint_225000
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95a241834d14e8ce32140cdf9151c8fdcaa7b79908747ded93e38f41d4ff7959
|
3 |
+
size 1530270447
|
config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": ".",
|
3 |
"architectures": [
|
4 |
"BigBirdForMaskedLM"
|
5 |
],
|
@@ -24,7 +23,6 @@
|
|
24 |
"position_embedding_type": "absolute",
|
25 |
"rescale_embeddings": false,
|
26 |
"sep_token_id": 66,
|
27 |
-
"torch_dtype": "float32",
|
28 |
"transformers_version": "4.9.0.dev0",
|
29 |
"type_vocab_size": 2,
|
30 |
"use_bias": true,
|
|
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"BigBirdForMaskedLM"
|
4 |
],
|
|
|
23 |
"position_embedding_type": "absolute",
|
24 |
"rescale_embeddings": false,
|
25 |
"sep_token_id": 66,
|
|
|
26 |
"transformers_version": "4.9.0.dev0",
|
27 |
"type_vocab_size": 2,
|
28 |
"use_bias": true,
|
events.out.tfevents.1626397185.t1v-n-f5c06ea1-w-0.760449.3.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55333c246f74b2c84620675b6048997ae79474af6bfdd07693a18bed98c164cc
|
3 |
+
size 11281744
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510090043
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8f5ca7b60c5e4b03edfdde3f172ec2572f02f5753f13fe7a658c57c2fc9bae5
|
3 |
size 510090043
|
wandb/run-20210716_005946-3bte92ft/files/output.log
CHANGED
@@ -8246,3 +8246,365 @@ Training...: 71999it [8:23:42, 2.67it/s]βββββββββββββ
|
|
8246 |
|
8247 |
|
8248 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8246 |
|
8247 |
|
8248 |
|
8249 |
+
|
8250 |
+
|
8251 |
+
|
8252 |
+
|
8253 |
+
|
8254 |
+
|
8255 |
+
|
8256 |
+
|
8257 |
+
|
8258 |
+
|
8259 |
+
|
8260 |
+
|
8261 |
+
|
8262 |
+
|
8263 |
+
|
8264 |
+
|
8265 |
+
|
8266 |
+
Training...: 72049it [8:24:53, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8267 |
+
|
8268 |
+
|
8269 |
+
|
8270 |
+
|
8271 |
+
Training...: 72099it [8:25:13, 2.72it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8272 |
+
|
8273 |
+
|
8274 |
+
|
8275 |
+
|
8276 |
+
Training...: 72149it [8:25:33, 2.67it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8277 |
+
|
8278 |
+
|
8279 |
+
|
8280 |
+
|
8281 |
+
Training...: 72199it [8:25:53, 2.71it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8282 |
+
|
8283 |
+
|
8284 |
+
|
8285 |
+
|
8286 |
+
Training...: 72249it [8:26:13, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8287 |
+
|
8288 |
+
|
8289 |
+
|
8290 |
+
|
8291 |
+
Training...: 72299it [8:26:33, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8292 |
+
|
8293 |
+
|
8294 |
+
|
8295 |
+
|
8296 |
+
|
8297 |
+
Training...: 72350it [8:27:06, 4.07s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8298 |
+
|
8299 |
+
|
8300 |
+
|
8301 |
+
|
8302 |
+
|
8303 |
+
Training...: 72400it [8:27:26, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8304 |
+
|
8305 |
+
|
8306 |
+
|
8307 |
+
|
8308 |
+
Training...: 72449it [8:27:33, 2.72it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8309 |
+
|
8310 |
+
|
8311 |
+
|
8312 |
+
|
8313 |
+
Training...: 72499it [8:27:53, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8314 |
+
|
8315 |
+
|
8316 |
+
|
8317 |
+
|
8318 |
+
|
8319 |
+
Training...: 72550it [8:28:27, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8320 |
+
|
8321 |
+
|
8322 |
+
|
8323 |
+
|
8324 |
+
|
8325 |
+
Training...: 72600it [8:28:47, 4.07s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8326 |
+
|
8327 |
+
|
8328 |
+
|
8329 |
+
|
8330 |
+
|
8331 |
+
Training...: 72652it [8:29:07, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8332 |
+
|
8333 |
+
|
8334 |
+
|
8335 |
+
|
8336 |
+
Training...: 72699it [8:29:13, 2.76it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8337 |
+
|
8338 |
+
|
8339 |
+
|
8340 |
+
|
8341 |
+
Training...: 72749it [8:29:33, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8342 |
+
|
8343 |
+
|
8344 |
+
|
8345 |
+
|
8346 |
+
|
8347 |
+
Training...: 72800it [8:30:07, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8348 |
+
|
8349 |
+
|
8350 |
+
|
8351 |
+
|
8352 |
+
|
8353 |
+
Training...: 72850it [8:30:27, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8354 |
+
|
8355 |
+
|
8356 |
+
|
8357 |
+
|
8358 |
+
|
8359 |
+
Training...: 72902it [8:30:47, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8360 |
+
|
8361 |
+
|
8362 |
+
|
8363 |
+
|
8364 |
+
Training...: 72949it [8:30:54, 2.71it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8365 |
+
|
8366 |
+
|
8367 |
+
|
8368 |
+
|
8369 |
+
Training...: 72999it [8:31:14, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8370 |
+
|
8371 |
+
|
8372 |
+
|
8373 |
+
|
8374 |
+
|
8375 |
+
Training...: 73050it [8:31:47, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8376 |
+
|
8377 |
+
|
8378 |
+
|
8379 |
+
|
8380 |
+
|
8381 |
+
Training...: 73102it [8:32:07, 2.25s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8382 |
+
|
8383 |
+
|
8384 |
+
|
8385 |
+
|
8386 |
+
|
8387 |
+
Training...: 73152it [8:32:27, 2.25s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8388 |
+
|
8389 |
+
|
8390 |
+
|
8391 |
+
|
8392 |
+
Training...: 73199it [8:32:34, 2.76it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8393 |
+
|
8394 |
+
|
8395 |
+
|
8396 |
+
|
8397 |
+
Training...: 73249it [8:32:54, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8398 |
+
|
8399 |
+
|
8400 |
+
|
8401 |
+
|
8402 |
+
Training...: 73299it [8:33:14, 2.70it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8403 |
+
|
8404 |
+
|
8405 |
+
|
8406 |
+
|
8407 |
+
|
8408 |
+
Training...: 73350it [8:33:47, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8409 |
+
|
8410 |
+
|
8411 |
+
|
8412 |
+
|
8413 |
+
|
8414 |
+
Training...: 73400it [8:34:07, 4.01s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8415 |
+
|
8416 |
+
|
8417 |
+
|
8418 |
+
|
8419 |
+
|
8420 |
+
Training...: 73452it [8:34:27, 2.22s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8421 |
+
|
8422 |
+
|
8423 |
+
|
8424 |
+
|
8425 |
+
|
8426 |
+
Training...: 73502it [8:34:47, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8427 |
+
|
8428 |
+
|
8429 |
+
|
8430 |
+
|
8431 |
+
|
8432 |
+
Training...: 73556it [8:35:08, 1.09it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8433 |
+
|
8434 |
+
|
8435 |
+
|
8436 |
+
|
8437 |
+
|
8438 |
+
Training...: 73606it [8:35:28, 1.09it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8439 |
+
|
8440 |
+
|
8441 |
+
|
8442 |
+
|
8443 |
+
|
8444 |
+
Training...: 73652it [8:35:48, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8445 |
+
|
8446 |
+
|
8447 |
+
|
8448 |
+
|
8449 |
+
|
8450 |
+
Training...: 73702it [8:36:08, 2.25s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8451 |
+
|
8452 |
+
|
8453 |
+
|
8454 |
+
|
8455 |
+
|
8456 |
+
Training...: 73754it [8:36:28, 1.40s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8457 |
+
|
8458 |
+
|
8459 |
+
|
8460 |
+
|
8461 |
+
|
8462 |
+
Training...: 73804it [8:36:48, 1.40s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8463 |
+
|
8464 |
+
|
8465 |
+
|
8466 |
+
|
8467 |
+
|
8468 |
+
Training...: 73856it [8:37:08, 1.09it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8469 |
+
|
8470 |
+
|
8471 |
+
|
8472 |
+
|
8473 |
+
|
8474 |
+
Training...: 73908it [8:37:28, 1.59it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8475 |
+
|
8476 |
+
|
8477 |
+
|
8478 |
+
|
8479 |
+
|
8480 |
+
Training...: 73952it [8:37:48, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8481 |
+
|
8482 |
+
|
8483 |
+
|
8484 |
+
|
8485 |
+
|
8486 |
+
Training...: 74002it [8:38:08, 2.25s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8487 |
+
|
8488 |
+
|
8489 |
+
|
8490 |
+
|
8491 |
+
|
8492 |
+
Training...: 74052it [8:38:28, 2.24s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8493 |
+
|
8494 |
+
|
8495 |
+
|
8496 |
+
|
8497 |
+
|
8498 |
+
Training...: 74104it [8:38:48, 1.40s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8499 |
+
|
8500 |
+
|
8501 |
+
|
8502 |
+
|
8503 |
+
|
8504 |
+
Training...: 74154it [8:39:08, 1.39s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8505 |
+
|
8506 |
+
|
8507 |
+
|
8508 |
+
|
8509 |
+
|
8510 |
+
Training...: 74204it [8:39:28, 1.40s/it]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8511 |
+
|
8512 |
+
|
8513 |
+
|
8514 |
+
|
8515 |
+
Training...: 74249it [8:39:35, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8516 |
+
|
8517 |
+
|
8518 |
+
|
8519 |
+
|
8520 |
+
|
8521 |
+
|
8522 |
+
Training...: 74308it [8:40:09, 1.59it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8523 |
+
|
8524 |
+
|
8525 |
+
|
8526 |
+
|
8527 |
+
|
8528 |
+
Training...: 74358it [8:40:29, 1.58it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8529 |
+
|
8530 |
+
|
8531 |
+
|
8532 |
+
|
8533 |
+
|
8534 |
+
Training...: 74410it [8:40:49, 2.25it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8535 |
+
|
8536 |
+
|
8537 |
+
|
8538 |
+
|
8539 |
+
|
8540 |
+
Training...: 74460it [8:41:09, 2.24it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8541 |
+
|
8542 |
+
|
8543 |
+
|
8544 |
+
|
8545 |
+
|
8546 |
+
Training...: 74506it [8:41:29, 1.04it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8547 |
+
|
8548 |
+
|
8549 |
+
|
8550 |
+
|
8551 |
+
|
8552 |
+
Training...: 74562it [8:41:49, 3.08it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8553 |
+
|
8554 |
+
|
8555 |
+
|
8556 |
+
Training...: 74599it [8:41:55, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8557 |
+
|
8558 |
+
|
8559 |
+
|
8560 |
+
|
8561 |
+
|
8562 |
+
Training...: 74649it [8:42:15, 2.70it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8563 |
+
|
8564 |
+
|
8565 |
+
|
8566 |
+
|
8567 |
+
|
8568 |
+
|
8569 |
+
Training...: 74708it [8:42:49, 1.58it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8570 |
+
|
8571 |
+
|
8572 |
+
|
8573 |
+
|
8574 |
+
Training...: 74749it [8:42:55, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8575 |
+
|
8576 |
+
|
8577 |
+
|
8578 |
+
|
8579 |
+
|
8580 |
+
|
8581 |
+
Training...: 74808it [8:43:29, 1.58it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8582 |
+
|
8583 |
+
|
8584 |
+
|
8585 |
+
|
8586 |
+
Training...: 74849it [8:43:36, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8587 |
+
|
8588 |
+
|
8589 |
+
|
8590 |
+
|
8591 |
+
|
8592 |
+
Training...: 74899it [8:43:56, 2.73it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8593 |
+
|
8594 |
+
|
8595 |
+
|
8596 |
+
|
8597 |
+
|
8598 |
+
Training...: 74949it [8:44:16, 2.68it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8599 |
+
|
8600 |
+
|
8601 |
+
|
8602 |
+
|
8603 |
+
|
8604 |
+
Training...: 74999it [8:44:36, 2.69it/s]ββββββββββββ| 500/500 [00:59<00:00, 7.90it/s]
|
8605 |
+
|
8606 |
+
|
8607 |
+
tcmalloc: large alloc 2715181056 bytes == 0x41e66a000 @ 0x7f779c64f680 0x7f779c66fbdd 0x7f7782c0920d 0x7f7782c17340 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c12bd3 0x7f7782c131fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a
|
8608 |
+
[09:50:33] - INFO - absl - Saved checkpoint at checkpoint_225000
|
8609 |
+
[09:50:34] - INFO - huggingface_hub.repository - git version 2.25.1
|
8610 |
+
git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
|
wandb/run-20210716_005946-3bte92ft/files/wandb-summary.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"training_step":
|
|
|
1 |
+
{"training_step": 225000, "learning_rate": 2.3847169359214604e-05, "train_loss": 2.061807155609131, "_runtime": 31840, "_timestamp": 1626429026, "_step": 1511, "eval_step": 222000, "eval_accuracy": 0.6153644919395447, "eval_loss": 1.9865816831588745}
|
wandb/run-20210716_005946-3bte92ft/logs/debug-internal.log
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb
CHANGED
Binary files a/wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb and b/wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb differ
|
|