ZeroUniqueness
commited on
Commit
β’
79fef62
1
Parent(s):
ffa4692
Training in progress, step 6900
Browse files- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- {checkpoint-6500 β checkpoint-6800/adapter_model}/README.md +0 -0
- {checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_config.json +0 -0
- {checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-6500/adapter_model β checkpoint-6900}/README.md +0 -0
- {checkpoint-6500/adapter_model β checkpoint-6900}/adapter_config.json +4 -4
- {checkpoint-6500/adapter_model β checkpoint-6900}/adapter_model.bin +1 -1
- {checkpoint-6500 β checkpoint-6900}/optimizer.pt +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_0.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_1.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_10.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_11.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_12.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_13.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_2.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_3.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_4.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_5.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_6.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_7.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_8.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/rng_state_9.pth +1 -1
- {checkpoint-6500 β checkpoint-6900}/scheduler.pt +1 -1
- {checkpoint-6500 β checkpoint-6900}/trainer_state.json +99 -3
- {checkpoint-6500 β checkpoint-6900}/training_args.bin +1 -1
- training_args.bin +1 -1
adapter_config.json
CHANGED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"up_proj",
|
18 |
-
"gate_proj",
|
19 |
"down_proj",
|
|
|
20 |
"q_proj",
|
21 |
"k_proj",
|
22 |
-
"
|
23 |
-
"v_proj"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
17 |
"down_proj",
|
18 |
+
"up_proj",
|
19 |
"q_proj",
|
20 |
"k_proj",
|
21 |
+
"gate_proj",
|
22 |
+
"v_proj",
|
23 |
+
"o_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
|
3 |
size 500897101
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_config.json
RENAMED
File without changes
|
{checkpoint-6500 β checkpoint-6800/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
|
3 |
size 500897101
|
{checkpoint-6500/adapter_model β checkpoint-6900}/README.md
RENAMED
File without changes
|
{checkpoint-6500/adapter_model β checkpoint-6900}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"up_proj",
|
18 |
-
"gate_proj",
|
19 |
"down_proj",
|
|
|
20 |
"q_proj",
|
21 |
"k_proj",
|
22 |
-
"
|
23 |
-
"v_proj"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
17 |
"down_proj",
|
18 |
+
"up_proj",
|
19 |
"q_proj",
|
20 |
"k_proj",
|
21 |
+
"gate_proj",
|
22 |
+
"v_proj",
|
23 |
+
"o_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-6500/adapter_model β checkpoint-6900}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
|
3 |
size 500897101
|
{checkpoint-6500 β checkpoint-6900}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e51694cb0a1cd9603ece0232dabcd7a370a6f8e378f0b01213ceaaf413c4d0c
|
3 |
size 1001752701
|
{checkpoint-6500 β checkpoint-6900}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:361a0cbcd4b75d795f0109f290c642aca880cc06772f00227ba1e0217fc939ef
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f5123ae610aa26457a3fa34fd70ab7e4ab44cb21dc09fd095aad55a2d15294a
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a213c64ebe2689a3f02bbe5b0ee2c66feb57c0f5f4f8621c0ec2f63c6c590892
|
3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:531d6f0912536de9c8ce68b83c17fc005458796d7921c1fe083548fda04d57a7
|
3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dad9e94de76a75ad267b7650de4df602944013ed9b553b148fa94de6ea86bb52
|
3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f9e0a77144d5a662958f707fc8b494554054fd279f5a3dc74159dc97e6d1e78
|
3 |
size 27789
|
{checkpoint-6500 β checkpoint-6900}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5b931b3c81ed25f1fcd0971b6da08a3869bfd28a0489528ed3c96c3619fa557
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e121c889141260915892ddfe435284a35a9f246ef67720bdc33081a846ab328
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f912038cfe5be27104676eb3640c3beb52c1c4c80bafea1732f0443a67ad4b4
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e7e05a90589de96c2e2748fa928890eb3ae0f6585d2b9768316f612aa8cf908
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dfaec3c28cf52d29cdacae2acf43363d9aea5a881877b06a0055a14c1b7aed6a
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c35c98f247077b0ff5a006aeba6561c7e6bf2e9fd8969c9bc35d64aaf01cf14a
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86a908ac53361e9d5c34680f8e1789e69824f75a3e0b100140c8133e52f8fbed
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5597a151c26d0f9f0ba329a3cf67af7f95b7fed3d58ea4461f586948ea11f890
|
3 |
size 27772
|
{checkpoint-6500 β checkpoint-6900}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed903a2193f395f38212602a53ab2a5c251266f0ae7d7dc0557e3b565ed8e240
|
3 |
size 627
|
{checkpoint-6500 β checkpoint-6900}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1590,11 +1590,107 @@
|
|
1590 |
"learning_rate": 1.2382654384049475e-05,
|
1591 |
"loss": 0.7714,
|
1592 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1593 |
}
|
1594 |
],
|
1595 |
"max_steps": 7737,
|
1596 |
"num_train_epochs": 3,
|
1597 |
-
"total_flos": 2.
|
1598 |
"trial_name": null,
|
1599 |
"trial_params": null
|
1600 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.6754556029468786,
|
5 |
+
"global_step": 6900,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1590 |
"learning_rate": 1.2382654384049475e-05,
|
1591 |
"loss": 0.7714,
|
1592 |
"step": 6500
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 2.53,
|
1596 |
+
"learning_rate": 1.1897272010289884e-05,
|
1597 |
+
"loss": 0.7701,
|
1598 |
+
"step": 6525
|
1599 |
+
},
|
1600 |
+
{
|
1601 |
+
"epoch": 2.54,
|
1602 |
+
"learning_rate": 1.1420991777454315e-05,
|
1603 |
+
"loss": 0.7628,
|
1604 |
+
"step": 6550
|
1605 |
+
},
|
1606 |
+
{
|
1607 |
+
"epoch": 2.55,
|
1608 |
+
"learning_rate": 1.0953862891394795e-05,
|
1609 |
+
"loss": 0.7661,
|
1610 |
+
"step": 6575
|
1611 |
+
},
|
1612 |
+
{
|
1613 |
+
"epoch": 2.56,
|
1614 |
+
"learning_rate": 1.0495933612511976e-05,
|
1615 |
+
"loss": 0.7729,
|
1616 |
+
"step": 6600
|
1617 |
+
},
|
1618 |
+
{
|
1619 |
+
"epoch": 2.57,
|
1620 |
+
"learning_rate": 1.0047251250769175e-05,
|
1621 |
+
"loss": 0.772,
|
1622 |
+
"step": 6625
|
1623 |
+
},
|
1624 |
+
{
|
1625 |
+
"epoch": 2.58,
|
1626 |
+
"learning_rate": 9.60786216080466e-06,
|
1627 |
+
"loss": 0.7702,
|
1628 |
+
"step": 6650
|
1629 |
+
},
|
1630 |
+
{
|
1631 |
+
"epoch": 2.59,
|
1632 |
+
"learning_rate": 9.177811737142627e-06,
|
1633 |
+
"loss": 0.7711,
|
1634 |
+
"step": 6675
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 2.6,
|
1638 |
+
"learning_rate": 8.757144409503359e-06,
|
1639 |
+
"loss": 0.765,
|
1640 |
+
"step": 6700
|
1641 |
+
},
|
1642 |
+
{
|
1643 |
+
"epoch": 2.61,
|
1644 |
+
"learning_rate": 8.34590363821306e-06,
|
1645 |
+
"loss": 0.7713,
|
1646 |
+
"step": 6725
|
1647 |
+
},
|
1648 |
+
{
|
1649 |
+
"epoch": 2.62,
|
1650 |
+
"learning_rate": 7.944131909713859e-06,
|
1651 |
+
"loss": 0.7631,
|
1652 |
+
"step": 6750
|
1653 |
+
},
|
1654 |
+
{
|
1655 |
+
"epoch": 2.63,
|
1656 |
+
"learning_rate": 7.551870732174416e-06,
|
1657 |
+
"loss": 0.767,
|
1658 |
+
"step": 6775
|
1659 |
+
},
|
1660 |
+
{
|
1661 |
+
"epoch": 2.64,
|
1662 |
+
"learning_rate": 7.169160631201566e-06,
|
1663 |
+
"loss": 0.7692,
|
1664 |
+
"step": 6800
|
1665 |
+
},
|
1666 |
+
{
|
1667 |
+
"epoch": 2.65,
|
1668 |
+
"learning_rate": 6.796041145653553e-06,
|
1669 |
+
"loss": 0.7677,
|
1670 |
+
"step": 6825
|
1671 |
+
},
|
1672 |
+
{
|
1673 |
+
"epoch": 2.66,
|
1674 |
+
"learning_rate": 6.432550823555128e-06,
|
1675 |
+
"loss": 0.7706,
|
1676 |
+
"step": 6850
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 2.67,
|
1680 |
+
"learning_rate": 6.078727218115043e-06,
|
1681 |
+
"loss": 0.7678,
|
1682 |
+
"step": 6875
|
1683 |
+
},
|
1684 |
+
{
|
1685 |
+
"epoch": 2.68,
|
1686 |
+
"learning_rate": 5.734606883846338e-06,
|
1687 |
+
"loss": 0.7717,
|
1688 |
+
"step": 6900
|
1689 |
}
|
1690 |
],
|
1691 |
"max_steps": 7737,
|
1692 |
"num_train_epochs": 3,
|
1693 |
+
"total_flos": 2.9715299616168083e+19,
|
1694 |
"trial_name": null,
|
1695 |
"trial_params": null
|
1696 |
}
|
{checkpoint-6500 β checkpoint-6900}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
|
3 |
size 4027
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
|
3 |
size 4027
|