dat commited on
Commit
0e50dbb
β€’
1 Parent(s): bab4e66

Saving weights and logs of step 225000

Browse files
checkpoint_225000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a241834d14e8ce32140cdf9151c8fdcaa7b79908747ded93e38f41d4ff7959
3
+ size 1530270447
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": ".",
3
  "architectures": [
4
  "BigBirdForMaskedLM"
5
  ],
@@ -24,7 +23,6 @@
24
  "position_embedding_type": "absolute",
25
  "rescale_embeddings": false,
26
  "sep_token_id": 66,
27
- "torch_dtype": "float32",
28
  "transformers_version": "4.9.0.dev0",
29
  "type_vocab_size": 2,
30
  "use_bias": true,
 
1
  {
 
2
  "architectures": [
3
  "BigBirdForMaskedLM"
4
  ],
 
23
  "position_embedding_type": "absolute",
24
  "rescale_embeddings": false,
25
  "sep_token_id": 66,
 
26
  "transformers_version": "4.9.0.dev0",
27
  "type_vocab_size": 2,
28
  "use_bias": true,
events.out.tfevents.1626397185.t1v-n-f5c06ea1-w-0.760449.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75d98147922b932dd6939ba021778b95c1ec799a8036630bffff4b749c5f07e9
3
- size 10830402
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55333c246f74b2c84620675b6048997ae79474af6bfdd07693a18bed98c164cc
3
+ size 11281744
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:baac2faaefa281dd0212650164a6b61ed52f9de5369418f02fe29cd52b3f08cd
3
  size 510090043
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f5ca7b60c5e4b03edfdde3f172ec2572f02f5753f13fe7a658c57c2fc9bae5
3
  size 510090043
wandb/run-20210716_005946-3bte92ft/files/output.log CHANGED
@@ -8246,3 +8246,365 @@ Training...: 71999it [8:23:42, 2.67it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ
8246
 
8247
 
8248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8246
 
8247
 
8248
 
8249
+
8250
+
8251
+
8252
+
8253
+
8254
+
8255
+
8256
+
8257
+
8258
+
8259
+
8260
+
8261
+
8262
+
8263
+
8264
+
8265
+
8266
+ Training...: 72049it [8:24:53, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8267
+
8268
+
8269
+
8270
+
8271
+ Training...: 72099it [8:25:13, 2.72it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8272
+
8273
+
8274
+
8275
+
8276
+ Training...: 72149it [8:25:33, 2.67it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8277
+
8278
+
8279
+
8280
+
8281
+ Training...: 72199it [8:25:53, 2.71it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8282
+
8283
+
8284
+
8285
+
8286
+ Training...: 72249it [8:26:13, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8287
+
8288
+
8289
+
8290
+
8291
+ Training...: 72299it [8:26:33, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8292
+
8293
+
8294
+
8295
+
8296
+
8297
+ Training...: 72350it [8:27:06, 4.07s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8298
+
8299
+
8300
+
8301
+
8302
+
8303
+ Training...: 72400it [8:27:26, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8304
+
8305
+
8306
+
8307
+
8308
+ Training...: 72449it [8:27:33, 2.72it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8309
+
8310
+
8311
+
8312
+
8313
+ Training...: 72499it [8:27:53, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8314
+
8315
+
8316
+
8317
+
8318
+
8319
+ Training...: 72550it [8:28:27, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8320
+
8321
+
8322
+
8323
+
8324
+
8325
+ Training...: 72600it [8:28:47, 4.07s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8326
+
8327
+
8328
+
8329
+
8330
+
8331
+ Training...: 72652it [8:29:07, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8332
+
8333
+
8334
+
8335
+
8336
+ Training...: 72699it [8:29:13, 2.76it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8337
+
8338
+
8339
+
8340
+
8341
+ Training...: 72749it [8:29:33, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8342
+
8343
+
8344
+
8345
+
8346
+
8347
+ Training...: 72800it [8:30:07, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8348
+
8349
+
8350
+
8351
+
8352
+
8353
+ Training...: 72850it [8:30:27, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8354
+
8355
+
8356
+
8357
+
8358
+
8359
+ Training...: 72902it [8:30:47, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8360
+
8361
+
8362
+
8363
+
8364
+ Training...: 72949it [8:30:54, 2.71it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8365
+
8366
+
8367
+
8368
+
8369
+ Training...: 72999it [8:31:14, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8370
+
8371
+
8372
+
8373
+
8374
+
8375
+ Training...: 73050it [8:31:47, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8376
+
8377
+
8378
+
8379
+
8380
+
8381
+ Training...: 73102it [8:32:07, 2.25s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8382
+
8383
+
8384
+
8385
+
8386
+
8387
+ Training...: 73152it [8:32:27, 2.25s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8388
+
8389
+
8390
+
8391
+
8392
+ Training...: 73199it [8:32:34, 2.76it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8393
+
8394
+
8395
+
8396
+
8397
+ Training...: 73249it [8:32:54, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8398
+
8399
+
8400
+
8401
+
8402
+ Training...: 73299it [8:33:14, 2.70it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8403
+
8404
+
8405
+
8406
+
8407
+
8408
+ Training...: 73350it [8:33:47, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8409
+
8410
+
8411
+
8412
+
8413
+
8414
+ Training...: 73400it [8:34:07, 4.01s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8415
+
8416
+
8417
+
8418
+
8419
+
8420
+ Training...: 73452it [8:34:27, 2.22s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8421
+
8422
+
8423
+
8424
+
8425
+
8426
+ Training...: 73502it [8:34:47, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8427
+
8428
+
8429
+
8430
+
8431
+
8432
+ Training...: 73556it [8:35:08, 1.09it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8433
+
8434
+
8435
+
8436
+
8437
+
8438
+ Training...: 73606it [8:35:28, 1.09it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8439
+
8440
+
8441
+
8442
+
8443
+
8444
+ Training...: 73652it [8:35:48, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8445
+
8446
+
8447
+
8448
+
8449
+
8450
+ Training...: 73702it [8:36:08, 2.25s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8451
+
8452
+
8453
+
8454
+
8455
+
8456
+ Training...: 73754it [8:36:28, 1.40s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8457
+
8458
+
8459
+
8460
+
8461
+
8462
+ Training...: 73804it [8:36:48, 1.40s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8463
+
8464
+
8465
+
8466
+
8467
+
8468
+ Training...: 73856it [8:37:08, 1.09it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8469
+
8470
+
8471
+
8472
+
8473
+
8474
+ Training...: 73908it [8:37:28, 1.59it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8475
+
8476
+
8477
+
8478
+
8479
+
8480
+ Training...: 73952it [8:37:48, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8481
+
8482
+
8483
+
8484
+
8485
+
8486
+ Training...: 74002it [8:38:08, 2.25s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8487
+
8488
+
8489
+
8490
+
8491
+
8492
+ Training...: 74052it [8:38:28, 2.24s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8493
+
8494
+
8495
+
8496
+
8497
+
8498
+ Training...: 74104it [8:38:48, 1.40s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8499
+
8500
+
8501
+
8502
+
8503
+
8504
+ Training...: 74154it [8:39:08, 1.39s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8505
+
8506
+
8507
+
8508
+
8509
+
8510
+ Training...: 74204it [8:39:28, 1.40s/it]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8511
+
8512
+
8513
+
8514
+
8515
+ Training...: 74249it [8:39:35, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8516
+
8517
+
8518
+
8519
+
8520
+
8521
+
8522
+ Training...: 74308it [8:40:09, 1.59it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8523
+
8524
+
8525
+
8526
+
8527
+
8528
+ Training...: 74358it [8:40:29, 1.58it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8529
+
8530
+
8531
+
8532
+
8533
+
8534
+ Training...: 74410it [8:40:49, 2.25it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8535
+
8536
+
8537
+
8538
+
8539
+
8540
+ Training...: 74460it [8:41:09, 2.24it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8541
+
8542
+
8543
+
8544
+
8545
+
8546
+ Training...: 74506it [8:41:29, 1.04it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8547
+
8548
+
8549
+
8550
+
8551
+
8552
+ Training...: 74562it [8:41:49, 3.08it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8553
+
8554
+
8555
+
8556
+ Training...: 74599it [8:41:55, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8557
+
8558
+
8559
+
8560
+
8561
+
8562
+ Training...: 74649it [8:42:15, 2.70it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8563
+
8564
+
8565
+
8566
+
8567
+
8568
+
8569
+ Training...: 74708it [8:42:49, 1.58it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8570
+
8571
+
8572
+
8573
+
8574
+ Training...: 74749it [8:42:55, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8575
+
8576
+
8577
+
8578
+
8579
+
8580
+
8581
+ Training...: 74808it [8:43:29, 1.58it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8582
+
8583
+
8584
+
8585
+
8586
+ Training...: 74849it [8:43:36, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8587
+
8588
+
8589
+
8590
+
8591
+
8592
+ Training...: 74899it [8:43:56, 2.73it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8593
+
8594
+
8595
+
8596
+
8597
+
8598
+ Training...: 74949it [8:44:16, 2.68it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8599
+
8600
+
8601
+
8602
+
8603
+
8604
+ Training...: 74999it [8:44:36, 2.69it/s]β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 500/500 [00:59<00:00, 7.90it/s]
8605
+
8606
+
8607
+ tcmalloc: large alloc 2715181056 bytes == 0x41e66a000 @ 0x7f779c64f680 0x7f779c66fbdd 0x7f7782c0920d 0x7f7782c17340 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c16e87 0x7f7782c12bd3 0x7f7782c131fe 0x504d56 0x56acb6 0x568d9a 0x5f5b33 0x56bc9b 0x5f5956 0x56aadf 0x5f5956 0x56fb87 0x568d9a 0x5f5b33 0x56bc9b 0x568d9a
8608
+ [09:50:33] - INFO - absl - Saved checkpoint at checkpoint_225000
8609
+ [09:50:34] - INFO - huggingface_hub.repository - git version 2.25.1
8610
+ git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5)
wandb/run-20210716_005946-3bte92ft/files/wandb-summary.json CHANGED
@@ -1 +1 @@
1
- {"training_step": 222000, "learning_rate": 2.393302202108316e-05, "train_loss": 2.2153563499450684, "_runtime": 30573, "_timestamp": 1626427759, "_step": 1450, "eval_step": 216000, "eval_accuracy": 0.6142588257789612, "eval_loss": 1.9934282302856445}
 
1
+ {"training_step": 225000, "learning_rate": 2.3847169359214604e-05, "train_loss": 2.061807155609131, "_runtime": 31840, "_timestamp": 1626429026, "_step": 1511, "eval_step": 222000, "eval_accuracy": 0.6153644919395447, "eval_loss": 1.9865816831588745}
wandb/run-20210716_005946-3bte92ft/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb CHANGED
Binary files a/wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb and b/wandb/run-20210716_005946-3bte92ft/run-3bte92ft.wandb differ