ZeroUniqueness
commited on
Commit
β’
7963717
1
Parent(s):
f4c276d
Training in progress, step 6500
Browse files- adapter_model.bin +1 -1
- {checkpoint-6100 β checkpoint-6400/adapter_model}/README.md +0 -0
- {checkpoint-6100 β checkpoint-6400/adapter_model}/adapter_config.json +3 -3
- {checkpoint-6100 β checkpoint-6400/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-6100/adapter_model β checkpoint-6500}/README.md +0 -0
- {checkpoint-6100/adapter_model β checkpoint-6500}/adapter_config.json +3 -3
- {checkpoint-6100/adapter_model β checkpoint-6500}/adapter_model.bin +1 -1
- {checkpoint-6100 β checkpoint-6500}/optimizer.pt +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_0.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_1.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_10.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_11.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_12.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_13.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_2.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_3.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_4.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_5.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_6.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_7.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_8.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/rng_state_9.pth +1 -1
- {checkpoint-6100 β checkpoint-6500}/scheduler.pt +1 -1
- {checkpoint-6100 β checkpoint-6500}/trainer_state.json +99 -3
- {checkpoint-6100 β checkpoint-6500}/training_args.bin +1 -1
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eca317a26dab233b92de0d6d401d39cb8b1fe17ca51e4a191e73b88cb76bfc3d
|
3 |
size 500897101
|
{checkpoint-6100 β checkpoint-6400/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-6100 β checkpoint-6400/adapter_model}/adapter_config.json
RENAMED
@@ -14,12 +14,12 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"k_proj",
|
18 |
-
"down_proj",
|
19 |
-
"o_proj",
|
20 |
"up_proj",
|
21 |
"gate_proj",
|
|
|
22 |
"q_proj",
|
|
|
|
|
23 |
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"up_proj",
|
18 |
"gate_proj",
|
19 |
+
"down_proj",
|
20 |
"q_proj",
|
21 |
+
"k_proj",
|
22 |
+
"o_proj",
|
23 |
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
{checkpoint-6100 β checkpoint-6400/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90226543ac07d2ffe305a17c9351436408a800fb2798a5a37995d29cfa11cdec
|
3 |
size 500897101
|
{checkpoint-6100/adapter_model β checkpoint-6500}/README.md
RENAMED
File without changes
|
{checkpoint-6100/adapter_model β checkpoint-6500}/adapter_config.json
RENAMED
@@ -14,12 +14,12 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"k_proj",
|
18 |
-
"down_proj",
|
19 |
-
"o_proj",
|
20 |
"up_proj",
|
21 |
"gate_proj",
|
|
|
22 |
"q_proj",
|
|
|
|
|
23 |
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"up_proj",
|
18 |
"gate_proj",
|
19 |
+
"down_proj",
|
20 |
"q_proj",
|
21 |
+
"k_proj",
|
22 |
+
"o_proj",
|
23 |
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
{checkpoint-6100/adapter_model β checkpoint-6500}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eca317a26dab233b92de0d6d401d39cb8b1fe17ca51e4a191e73b88cb76bfc3d
|
3 |
size 500897101
|
{checkpoint-6100 β checkpoint-6500}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9682eef3ae7298917bb748df54df8acc39f0813218a1379d7f07289466ceb1f
|
3 |
size 1001752701
|
{checkpoint-6100 β checkpoint-6500}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dbf0d11080193db9fc57ef80bb7e4db0c2dfdb81c6c7d949934ecea2b60c19bf
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55ed8dac28e473953d59209a3d2e84f582e221e1aeec01bb889b689dd5d44e2d
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccf2c7c92e70421594d6f3f46563c37fcacbab1b490cdc37991945fbefd0f763
|
3 |
size 27789
|
{checkpoint-6100 β checkpoint-6500}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b57753b4305e0903d863e858d017bc2028a3f7434a2c1e604d81db6a5380681d
|
3 |
size 27789
|
{checkpoint-6100 β checkpoint-6500}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3a73c535e411cb5175cf8662e504f0b075dc27417beaf98fb6764a53d08c703
|
3 |
size 27789
|
{checkpoint-6100 β checkpoint-6500}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c48a0a908e8938ff0202d305b73a93eef0a0f00a8215a6dc22758d9da73ca3d
|
3 |
size 27789
|
{checkpoint-6100 β checkpoint-6500}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25834dbb3472e6bffcb9ff8e2e1078b3d7285d899c32d530801c909e58792ca0
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f2243c59fbd105663e2bc1078c5d294f026256157124a120dd6fc96481dde2a
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80e34c823c057e9fa9761cbcb6a98def6aedfc20291fc6e93777c2fd857362da
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49a370bb0d4117c8cf565138b86c7c35fdd0f6916cd6a676aff48559b5050a97
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a142e199df177be102679c7c5299cd0ca74e71330003c3b0c22bf166a55fd738
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fdd64c508edb14ee20f09cd5f53a1d6ae1b6ac188a5ce19c88b43bb1c51e651c
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:117885c5fa979f628afc9a7003d4946141b0153b62ce3ffa6875857ffdd78ae8
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9794ac48303e5da2f12696c803580c6cec30009df7c1a5151886c1bb7932c058
|
3 |
size 27772
|
{checkpoint-6100 β checkpoint-6500}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c2bdf46b70bc586fca89e010bc74dca67e64afb8ffe6379ceecc7b272fb7824
|
3 |
size 627
|
{checkpoint-6100 β checkpoint-6500}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1494,11 +1494,107 @@
|
|
1494 |
"learning_rate": 2.1342929523880416e-05,
|
1495 |
"loss": 0.7652,
|
1496 |
"step": 6100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1497 |
}
|
1498 |
],
|
1499 |
"max_steps": 7737,
|
1500 |
"num_train_epochs": 3,
|
1501 |
-
"total_flos": 2.
|
1502 |
"trial_name": null,
|
1503 |
"trial_params": null
|
1504 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.520356727413726,
|
5 |
+
"global_step": 6500,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1494 |
"learning_rate": 2.1342929523880416e-05,
|
1495 |
"loss": 0.7652,
|
1496 |
"step": 6100
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 2.37,
|
1500 |
+
"learning_rate": 2.0719355560884246e-05,
|
1501 |
+
"loss": 0.765,
|
1502 |
+
"step": 6125
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 2.38,
|
1506 |
+
"learning_rate": 2.010397230451766e-05,
|
1507 |
+
"loss": 0.7704,
|
1508 |
+
"step": 6150
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 2.39,
|
1512 |
+
"learning_rate": 1.9496843331757784e-05,
|
1513 |
+
"loss": 0.767,
|
1514 |
+
"step": 6175
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 2.4,
|
1518 |
+
"learning_rate": 1.8898031366808467e-05,
|
1519 |
+
"loss": 0.7654,
|
1520 |
+
"step": 6200
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 2.41,
|
1524 |
+
"learning_rate": 1.830759827462004e-05,
|
1525 |
+
"loss": 0.7753,
|
1526 |
+
"step": 6225
|
1527 |
+
},
|
1528 |
+
{
|
1529 |
+
"epoch": 2.42,
|
1530 |
+
"learning_rate": 1.7725605054497906e-05,
|
1531 |
+
"loss": 0.7725,
|
1532 |
+
"step": 6250
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"epoch": 2.43,
|
1536 |
+
"learning_rate": 1.7152111833800522e-05,
|
1537 |
+
"loss": 0.7698,
|
1538 |
+
"step": 6275
|
1539 |
+
},
|
1540 |
+
{
|
1541 |
+
"epoch": 2.44,
|
1542 |
+
"learning_rate": 1.6587177861727454e-05,
|
1543 |
+
"loss": 0.7703,
|
1544 |
+
"step": 6300
|
1545 |
+
},
|
1546 |
+
{
|
1547 |
+
"epoch": 2.45,
|
1548 |
+
"learning_rate": 1.6030861503198204e-05,
|
1549 |
+
"loss": 0.7658,
|
1550 |
+
"step": 6325
|
1551 |
+
},
|
1552 |
+
{
|
1553 |
+
"epoch": 2.46,
|
1554 |
+
"learning_rate": 1.548322023282235e-05,
|
1555 |
+
"loss": 0.7677,
|
1556 |
+
"step": 6350
|
1557 |
+
},
|
1558 |
+
{
|
1559 |
+
"epoch": 2.47,
|
1560 |
+
"learning_rate": 1.4944310628961666e-05,
|
1561 |
+
"loss": 0.7694,
|
1562 |
+
"step": 6375
|
1563 |
+
},
|
1564 |
+
{
|
1565 |
+
"epoch": 2.48,
|
1566 |
+
"learning_rate": 1.4414188367884907e-05,
|
1567 |
+
"loss": 0.7668,
|
1568 |
+
"step": 6400
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 2.49,
|
1572 |
+
"learning_rate": 1.3892908218015654e-05,
|
1573 |
+
"loss": 0.7662,
|
1574 |
+
"step": 6425
|
1575 |
+
},
|
1576 |
+
{
|
1577 |
+
"epoch": 2.5,
|
1578 |
+
"learning_rate": 1.3380524034274122e-05,
|
1579 |
+
"loss": 0.7689,
|
1580 |
+
"step": 6450
|
1581 |
+
},
|
1582 |
+
{
|
1583 |
+
"epoch": 2.51,
|
1584 |
+
"learning_rate": 1.2877088752513189e-05,
|
1585 |
+
"loss": 0.7694,
|
1586 |
+
"step": 6475
|
1587 |
+
},
|
1588 |
+
{
|
1589 |
+
"epoch": 2.52,
|
1590 |
+
"learning_rate": 1.2382654384049475e-05,
|
1591 |
+
"loss": 0.7714,
|
1592 |
+
"step": 6500
|
1593 |
}
|
1594 |
],
|
1595 |
"max_steps": 7737,
|
1596 |
"num_train_epochs": 3,
|
1597 |
+
"total_flos": 2.7994166247676707e+19,
|
1598 |
"trial_name": null,
|
1599 |
"trial_params": null
|
1600 |
}
|
{checkpoint-6100 β checkpoint-6500}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85009bca9623c846e630c294adb80ecbcd9e720da8da9f9ee5311b562908b91
|
3 |
size 4027
|