ZeroUniqueness commited on
Commit
79fef62
β€’
1 Parent(s): ffa4692

Training in progress, step 6900

Browse files
Files changed (27) hide show
  1. adapter_config.json +4 -4
  2. adapter_model.bin +1 -1
  3. {checkpoint-6500 β†’ checkpoint-6800/adapter_model}/README.md +0 -0
  4. {checkpoint-6500 β†’ checkpoint-6800/adapter_model}/adapter_config.json +0 -0
  5. {checkpoint-6500 β†’ checkpoint-6800/adapter_model}/adapter_model.bin +1 -1
  6. {checkpoint-6500/adapter_model β†’ checkpoint-6900}/README.md +0 -0
  7. {checkpoint-6500/adapter_model β†’ checkpoint-6900}/adapter_config.json +4 -4
  8. {checkpoint-6500/adapter_model β†’ checkpoint-6900}/adapter_model.bin +1 -1
  9. {checkpoint-6500 β†’ checkpoint-6900}/optimizer.pt +1 -1
  10. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_0.pth +1 -1
  11. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_1.pth +1 -1
  12. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_10.pth +1 -1
  13. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_11.pth +1 -1
  14. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_12.pth +1 -1
  15. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_13.pth +1 -1
  16. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_2.pth +1 -1
  17. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_3.pth +1 -1
  18. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_4.pth +1 -1
  19. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_5.pth +1 -1
  20. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_6.pth +1 -1
  21. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_7.pth +1 -1
  22. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_8.pth +1 -1
  23. {checkpoint-6500 β†’ checkpoint-6900}/rng_state_9.pth +1 -1
  24. {checkpoint-6500 β†’ checkpoint-6900}/scheduler.pt +1 -1
  25. {checkpoint-6500 β†’ checkpoint-6900}/trainer_state.json +99 -3
  26. {checkpoint-6500 β†’ checkpoint-6900}/training_args.bin +1 -1
  27. training_args.bin +1 -1
adapter_config.json CHANGED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "up_proj",
18
- "gate_proj",
19
  "down_proj",
 
20
  "q_proj",
21
  "k_proj",
22
- "o_proj",
23
- "v_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
17
  "down_proj",
18
+ "up_proj",
19
  "q_proj",
20
  "k_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
3
  size 500897101
{checkpoint-6500 β†’ checkpoint-6800/adapter_model}/README.md RENAMED
File without changes
{checkpoint-6500 β†’ checkpoint-6800/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-6500 β†’ checkpoint-6800/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eca317a26dab233b92de0d6d401d39cb8b1fe17ca51e4a191e73b88cb76bfc3d
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caee01e1da9f7e92223499414f8bb25b28ea34daf6fb927c9bd869dcba2559fd
3
  size 500897101
{checkpoint-6500/adapter_model β†’ checkpoint-6900}/README.md RENAMED
File without changes
{checkpoint-6500/adapter_model β†’ checkpoint-6900}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "up_proj",
18
- "gate_proj",
19
  "down_proj",
 
20
  "q_proj",
21
  "k_proj",
22
- "o_proj",
23
- "v_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
17
  "down_proj",
18
+ "up_proj",
19
  "q_proj",
20
  "k_proj",
21
+ "gate_proj",
22
+ "v_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-6500/adapter_model β†’ checkpoint-6900}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eca317a26dab233b92de0d6d401d39cb8b1fe17ca51e4a191e73b88cb76bfc3d
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dce28954f69ce7f466b50fc774c94fb6da869b76f94baf208c3e276b1365db2
3
  size 500897101
{checkpoint-6500 β†’ checkpoint-6900}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9682eef3ae7298917bb748df54df8acc39f0813218a1379d7f07289466ceb1f
3
  size 1001752701
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e51694cb0a1cd9603ece0232dabcd7a370a6f8e378f0b01213ceaaf413c4d0c
3
  size 1001752701
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf0d11080193db9fc57ef80bb7e4db0c2dfdb81c6c7d949934ecea2b60c19bf
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:361a0cbcd4b75d795f0109f290c642aca880cc06772f00227ba1e0217fc939ef
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55ed8dac28e473953d59209a3d2e84f582e221e1aeec01bb889b689dd5d44e2d
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5123ae610aa26457a3fa34fd70ab7e4ab44cb21dc09fd095aad55a2d15294a
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_10.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccf2c7c92e70421594d6f3f46563c37fcacbab1b490cdc37991945fbefd0f763
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a213c64ebe2689a3f02bbe5b0ee2c66feb57c0f5f4f8621c0ec2f63c6c590892
3
  size 27789
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_11.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b57753b4305e0903d863e858d017bc2028a3f7434a2c1e604d81db6a5380681d
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:531d6f0912536de9c8ce68b83c17fc005458796d7921c1fe083548fda04d57a7
3
  size 27789
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_12.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e3a73c535e411cb5175cf8662e504f0b075dc27417beaf98fb6764a53d08c703
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad9e94de76a75ad267b7650de4df602944013ed9b553b148fa94de6ea86bb52
3
  size 27789
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_13.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c48a0a908e8938ff0202d305b73a93eef0a0f00a8215a6dc22758d9da73ca3d
3
  size 27789
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f9e0a77144d5a662958f707fc8b494554054fd279f5a3dc74159dc97e6d1e78
3
  size 27789
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_2.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25834dbb3472e6bffcb9ff8e2e1078b3d7285d899c32d530801c909e58792ca0
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5b931b3c81ed25f1fcd0971b6da08a3869bfd28a0489528ed3c96c3619fa557
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_3.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2243c59fbd105663e2bc1078c5d294f026256157124a120dd6fc96481dde2a
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e121c889141260915892ddfe435284a35a9f246ef67720bdc33081a846ab328
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_4.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80e34c823c057e9fa9761cbcb6a98def6aedfc20291fc6e93777c2fd857362da
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f912038cfe5be27104676eb3640c3beb52c1c4c80bafea1732f0443a67ad4b4
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_5.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49a370bb0d4117c8cf565138b86c7c35fdd0f6916cd6a676aff48559b5050a97
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e7e05a90589de96c2e2748fa928890eb3ae0f6585d2b9768316f612aa8cf908
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_6.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a142e199df177be102679c7c5299cd0ca74e71330003c3b0c22bf166a55fd738
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfaec3c28cf52d29cdacae2acf43363d9aea5a881877b06a0055a14c1b7aed6a
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_7.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdd64c508edb14ee20f09cd5f53a1d6ae1b6ac188a5ce19c88b43bb1c51e651c
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c35c98f247077b0ff5a006aeba6561c7e6bf2e9fd8969c9bc35d64aaf01cf14a
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_8.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:117885c5fa979f628afc9a7003d4946141b0153b62ce3ffa6875857ffdd78ae8
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86a908ac53361e9d5c34680f8e1789e69824f75a3e0b100140c8133e52f8fbed
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/rng_state_9.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9794ac48303e5da2f12696c803580c6cec30009df7c1a5151886c1bb7932c058
3
  size 27772
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5597a151c26d0f9f0ba329a3cf67af7f95b7fed3d58ea4461f586948ea11f890
3
  size 27772
{checkpoint-6500 β†’ checkpoint-6900}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c2bdf46b70bc586fca89e010bc74dca67e64afb8ffe6379ceecc7b272fb7824
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed903a2193f395f38212602a53ab2a5c251266f0ae7d7dc0557e3b565ed8e240
3
  size 627
{checkpoint-6500 β†’ checkpoint-6900}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.520356727413726,
5
- "global_step": 6500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1590,11 +1590,107 @@
1590
  "learning_rate": 1.2382654384049475e-05,
1591
  "loss": 0.7714,
1592
  "step": 6500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1593
  }
1594
  ],
1595
  "max_steps": 7737,
1596
  "num_train_epochs": 3,
1597
- "total_flos": 2.7994166247676707e+19,
1598
  "trial_name": null,
1599
  "trial_params": null
1600
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6754556029468786,
5
+ "global_step": 6900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1590
  "learning_rate": 1.2382654384049475e-05,
1591
  "loss": 0.7714,
1592
  "step": 6500
1593
+ },
1594
+ {
1595
+ "epoch": 2.53,
1596
+ "learning_rate": 1.1897272010289884e-05,
1597
+ "loss": 0.7701,
1598
+ "step": 6525
1599
+ },
1600
+ {
1601
+ "epoch": 2.54,
1602
+ "learning_rate": 1.1420991777454315e-05,
1603
+ "loss": 0.7628,
1604
+ "step": 6550
1605
+ },
1606
+ {
1607
+ "epoch": 2.55,
1608
+ "learning_rate": 1.0953862891394795e-05,
1609
+ "loss": 0.7661,
1610
+ "step": 6575
1611
+ },
1612
+ {
1613
+ "epoch": 2.56,
1614
+ "learning_rate": 1.0495933612511976e-05,
1615
+ "loss": 0.7729,
1616
+ "step": 6600
1617
+ },
1618
+ {
1619
+ "epoch": 2.57,
1620
+ "learning_rate": 1.0047251250769175e-05,
1621
+ "loss": 0.772,
1622
+ "step": 6625
1623
+ },
1624
+ {
1625
+ "epoch": 2.58,
1626
+ "learning_rate": 9.60786216080466e-06,
1627
+ "loss": 0.7702,
1628
+ "step": 6650
1629
+ },
1630
+ {
1631
+ "epoch": 2.59,
1632
+ "learning_rate": 9.177811737142627e-06,
1633
+ "loss": 0.7711,
1634
+ "step": 6675
1635
+ },
1636
+ {
1637
+ "epoch": 2.6,
1638
+ "learning_rate": 8.757144409503359e-06,
1639
+ "loss": 0.765,
1640
+ "step": 6700
1641
+ },
1642
+ {
1643
+ "epoch": 2.61,
1644
+ "learning_rate": 8.34590363821306e-06,
1645
+ "loss": 0.7713,
1646
+ "step": 6725
1647
+ },
1648
+ {
1649
+ "epoch": 2.62,
1650
+ "learning_rate": 7.944131909713859e-06,
1651
+ "loss": 0.7631,
1652
+ "step": 6750
1653
+ },
1654
+ {
1655
+ "epoch": 2.63,
1656
+ "learning_rate": 7.551870732174416e-06,
1657
+ "loss": 0.767,
1658
+ "step": 6775
1659
+ },
1660
+ {
1661
+ "epoch": 2.64,
1662
+ "learning_rate": 7.169160631201566e-06,
1663
+ "loss": 0.7692,
1664
+ "step": 6800
1665
+ },
1666
+ {
1667
+ "epoch": 2.65,
1668
+ "learning_rate": 6.796041145653553e-06,
1669
+ "loss": 0.7677,
1670
+ "step": 6825
1671
+ },
1672
+ {
1673
+ "epoch": 2.66,
1674
+ "learning_rate": 6.432550823555128e-06,
1675
+ "loss": 0.7706,
1676
+ "step": 6850
1677
+ },
1678
+ {
1679
+ "epoch": 2.67,
1680
+ "learning_rate": 6.078727218115043e-06,
1681
+ "loss": 0.7678,
1682
+ "step": 6875
1683
+ },
1684
+ {
1685
+ "epoch": 2.68,
1686
+ "learning_rate": 5.734606883846338e-06,
1687
+ "loss": 0.7717,
1688
+ "step": 6900
1689
  }
1690
  ],
1691
  "max_steps": 7737,
1692
  "num_train_epochs": 3,
1693
+ "total_flos": 2.9715299616168083e+19,
1694
  "trial_name": null,
1695
  "trial_params": null
1696
  }
{checkpoint-6500 β†’ checkpoint-6900}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85009bca9623c846e630c294adb80ecbcd9e720da8da9f9ee5311b562908b91
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
3
  size 4027
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e85009bca9623c846e630c294adb80ecbcd9e720da8da9f9ee5311b562908b91
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c2f4cc723c65538de12d445f71a30b1610b702bfe771edb2385636bb8724bfd
3
  size 4027