ZeroUniqueness commited on
Commit
c01f689
Β·
1 Parent(s): 1f8766e

Training in progress, step 28000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:738e63d241a0a1440d6120083eea9c47e57d65e6d8cb770ded25c0d8154e9153
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196c1d0349fe865fe652da8fb5f0318dc7fea1691a4f6b9ce949ba1d8fc568d2
3
  size 500897101
{checkpoint-24000 β†’ checkpoint-27000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-24000 β†’ checkpoint-27000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-24000 β†’ checkpoint-27000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738e63d241a0a1440d6120083eea9c47e57d65e6d8cb770ded25c0d8154e9153
3
  size 500897101
{checkpoint-24000/adapter_model β†’ checkpoint-28000}/README.md RENAMED
File without changes
{checkpoint-24000/adapter_model β†’ checkpoint-28000}/adapter_config.json RENAMED
File without changes
{checkpoint-24000/adapter_model β†’ checkpoint-28000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:196c1d0349fe865fe652da8fb5f0318dc7fea1691a4f6b9ce949ba1d8fc568d2
3
  size 500897101
{checkpoint-24000 β†’ checkpoint-28000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c516a1cda2f637391a4cc14345088f2c9231c0a65bc15aa25db213427b391e
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d78b2c094992c84dfe56222bca52d649375e78ba1b620b3066065ac043d46c
3
  size 1001723453
{checkpoint-24000 β†’ checkpoint-28000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6a0c20bcdf319a5ffb4a98f7ca699fd1efef2927f5c6f105ca13297f084ba02
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4463358c0b4ae7957b6ac92cc6e7a3966deed18d8592e7b96d0b5329e30e9c3
3
  size 14575
{checkpoint-24000 β†’ checkpoint-28000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd7a0be07ab26faaad77ee68b49800320fcb18c774f44c984bf819516ecfa521
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f454f0dd53d23ae46c76eed24d3b8210f5bc3588acc88def4db4574c3744dffb
3
  size 627
{checkpoint-24000 β†’ checkpoint-28000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6122664213180542,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-24000",
4
- "epoch": 0.8948212221766526,
5
- "global_step": 24000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1638,11 +1638,283 @@
1638
  "eval_samples_per_second": 0.432,
1639
  "eval_steps_per_second": 0.432,
1640
  "step": 24000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1641
  }
1642
  ],
1643
  "max_steps": 80463,
1644
  "num_train_epochs": 3,
1645
- "total_flos": 6.727787581059072e+18,
1646
  "trial_name": null,
1647
  "trial_params": null
1648
  }
 
1
  {
2
+ "best_metric": 0.5959522128105164,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-28000",
4
+ "epoch": 1.043958092539428,
5
+ "global_step": 28000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1638
  "eval_samples_per_second": 0.432,
1639
  "eval_steps_per_second": 0.432,
1640
  "step": 24000
1641
+ },
1642
+ {
1643
+ "epoch": 0.9,
1644
+ "learning_rate": 0.00015892343406377225,
1645
+ "loss": 0.5943,
1646
+ "step": 24100
1647
+ },
1648
+ {
1649
+ "epoch": 0.9,
1650
+ "learning_rate": 0.00015860748613933455,
1651
+ "loss": 0.6008,
1652
+ "step": 24200
1653
+ },
1654
+ {
1655
+ "epoch": 0.91,
1656
+ "learning_rate": 0.00015829064456419477,
1657
+ "loss": 0.6123,
1658
+ "step": 24300
1659
+ },
1660
+ {
1661
+ "epoch": 0.91,
1662
+ "learning_rate": 0.00015797291416957355,
1663
+ "loss": 0.5819,
1664
+ "step": 24400
1665
+ },
1666
+ {
1667
+ "epoch": 0.91,
1668
+ "learning_rate": 0.00015765429980024425,
1669
+ "loss": 0.5731,
1670
+ "step": 24500
1671
+ },
1672
+ {
1673
+ "epoch": 0.92,
1674
+ "learning_rate": 0.00015733480631445926,
1675
+ "loss": 0.593,
1676
+ "step": 24600
1677
+ },
1678
+ {
1679
+ "epoch": 0.92,
1680
+ "learning_rate": 0.00015701443858387562,
1681
+ "loss": 0.5764,
1682
+ "step": 24700
1683
+ },
1684
+ {
1685
+ "epoch": 0.92,
1686
+ "learning_rate": 0.00015669320149348104,
1687
+ "loss": 0.6037,
1688
+ "step": 24800
1689
+ },
1690
+ {
1691
+ "epoch": 0.93,
1692
+ "learning_rate": 0.0001563710999415193,
1693
+ "loss": 0.5958,
1694
+ "step": 24900
1695
+ },
1696
+ {
1697
+ "epoch": 0.93,
1698
+ "learning_rate": 0.00015604813883941535,
1699
+ "loss": 0.6186,
1700
+ "step": 25000
1701
+ },
1702
+ {
1703
+ "epoch": 0.93,
1704
+ "eval_loss": 0.6086174249649048,
1705
+ "eval_runtime": 1260.3923,
1706
+ "eval_samples_per_second": 0.43,
1707
+ "eval_steps_per_second": 0.43,
1708
+ "step": 25000
1709
+ },
1710
+ {
1711
+ "epoch": 0.94,
1712
+ "learning_rate": 0.00015572432311170096,
1713
+ "loss": 0.597,
1714
+ "step": 25100
1715
+ },
1716
+ {
1717
+ "epoch": 0.94,
1718
+ "learning_rate": 0.00015539965769593894,
1719
+ "loss": 0.5657,
1720
+ "step": 25200
1721
+ },
1722
+ {
1723
+ "epoch": 0.94,
1724
+ "learning_rate": 0.0001550741475426484,
1725
+ "loss": 0.6081,
1726
+ "step": 25300
1727
+ },
1728
+ {
1729
+ "epoch": 0.95,
1730
+ "learning_rate": 0.00015474779761522894,
1731
+ "loss": 0.5957,
1732
+ "step": 25400
1733
+ },
1734
+ {
1735
+ "epoch": 0.95,
1736
+ "learning_rate": 0.00015442061288988525,
1737
+ "loss": 0.6032,
1738
+ "step": 25500
1739
+ },
1740
+ {
1741
+ "epoch": 0.95,
1742
+ "learning_rate": 0.00015409259835555089,
1743
+ "loss": 0.5662,
1744
+ "step": 25600
1745
+ },
1746
+ {
1747
+ "epoch": 0.96,
1748
+ "learning_rate": 0.00015376375901381256,
1749
+ "loss": 0.5607,
1750
+ "step": 25700
1751
+ },
1752
+ {
1753
+ "epoch": 0.96,
1754
+ "learning_rate": 0.00015343409987883354,
1755
+ "loss": 0.5727,
1756
+ "step": 25800
1757
+ },
1758
+ {
1759
+ "epoch": 0.97,
1760
+ "learning_rate": 0.00015310362597727747,
1761
+ "loss": 0.5762,
1762
+ "step": 25900
1763
+ },
1764
+ {
1765
+ "epoch": 0.97,
1766
+ "learning_rate": 0.00015277234234823154,
1767
+ "loss": 0.5841,
1768
+ "step": 26000
1769
+ },
1770
+ {
1771
+ "epoch": 0.97,
1772
+ "eval_loss": 0.6026987433433533,
1773
+ "eval_runtime": 1292.1515,
1774
+ "eval_samples_per_second": 0.419,
1775
+ "eval_steps_per_second": 0.419,
1776
+ "step": 26000
1777
+ },
1778
+ {
1779
+ "epoch": 0.97,
1780
+ "learning_rate": 0.00015244025404312974,
1781
+ "loss": 0.6015,
1782
+ "step": 26100
1783
+ },
1784
+ {
1785
+ "epoch": 0.98,
1786
+ "learning_rate": 0.00015210736612567588,
1787
+ "loss": 0.5914,
1788
+ "step": 26200
1789
+ },
1790
+ {
1791
+ "epoch": 0.98,
1792
+ "learning_rate": 0.00015177368367176616,
1793
+ "loss": 0.5799,
1794
+ "step": 26300
1795
+ },
1796
+ {
1797
+ "epoch": 0.98,
1798
+ "learning_rate": 0.00015143921176941205,
1799
+ "loss": 0.6037,
1800
+ "step": 26400
1801
+ },
1802
+ {
1803
+ "epoch": 0.99,
1804
+ "learning_rate": 0.00015110395551866255,
1805
+ "loss": 0.5876,
1806
+ "step": 26500
1807
+ },
1808
+ {
1809
+ "epoch": 0.99,
1810
+ "learning_rate": 0.0001507679200315264,
1811
+ "loss": 0.5973,
1812
+ "step": 26600
1813
+ },
1814
+ {
1815
+ "epoch": 1.0,
1816
+ "learning_rate": 0.00015043111043189423,
1817
+ "loss": 0.5957,
1818
+ "step": 26700
1819
+ },
1820
+ {
1821
+ "epoch": 1.0,
1822
+ "learning_rate": 0.00015009353185546046,
1823
+ "loss": 0.5696,
1824
+ "step": 26800
1825
+ },
1826
+ {
1827
+ "epoch": 1.0,
1828
+ "learning_rate": 0.00014975518944964478,
1829
+ "loss": 0.5523,
1830
+ "step": 26900
1831
+ },
1832
+ {
1833
+ "epoch": 1.01,
1834
+ "learning_rate": 0.0001494160883735139,
1835
+ "loss": 0.5144,
1836
+ "step": 27000
1837
+ },
1838
+ {
1839
+ "epoch": 1.01,
1840
+ "eval_loss": 0.5985096096992493,
1841
+ "eval_runtime": 1314.8131,
1842
+ "eval_samples_per_second": 0.412,
1843
+ "eval_steps_per_second": 0.412,
1844
+ "step": 27000
1845
+ },
1846
+ {
1847
+ "epoch": 1.01,
1848
+ "learning_rate": 0.00014907623379770263,
1849
+ "loss": 0.5743,
1850
+ "step": 27100
1851
+ },
1852
+ {
1853
+ "epoch": 1.01,
1854
+ "learning_rate": 0.00014873563090433547,
1855
+ "loss": 0.5095,
1856
+ "step": 27200
1857
+ },
1858
+ {
1859
+ "epoch": 1.02,
1860
+ "learning_rate": 0.00014839428488694706,
1861
+ "loss": 0.5391,
1862
+ "step": 27300
1863
+ },
1864
+ {
1865
+ "epoch": 1.02,
1866
+ "learning_rate": 0.00014805220095040334,
1867
+ "loss": 0.5532,
1868
+ "step": 27400
1869
+ },
1870
+ {
1871
+ "epoch": 1.03,
1872
+ "learning_rate": 0.00014770938431082212,
1873
+ "loss": 0.536,
1874
+ "step": 27500
1875
+ },
1876
+ {
1877
+ "epoch": 1.03,
1878
+ "learning_rate": 0.00014736584019549342,
1879
+ "loss": 0.5204,
1880
+ "step": 27600
1881
+ },
1882
+ {
1883
+ "epoch": 1.03,
1884
+ "learning_rate": 0.00014702157384279997,
1885
+ "loss": 0.5026,
1886
+ "step": 27700
1887
+ },
1888
+ {
1889
+ "epoch": 1.04,
1890
+ "learning_rate": 0.0001466765905021371,
1891
+ "loss": 0.5319,
1892
+ "step": 27800
1893
+ },
1894
+ {
1895
+ "epoch": 1.04,
1896
+ "learning_rate": 0.00014633089543383295,
1897
+ "loss": 0.5112,
1898
+ "step": 27900
1899
+ },
1900
+ {
1901
+ "epoch": 1.04,
1902
+ "learning_rate": 0.00014598449390906804,
1903
+ "loss": 0.5146,
1904
+ "step": 28000
1905
+ },
1906
+ {
1907
+ "epoch": 1.04,
1908
+ "eval_loss": 0.5959522128105164,
1909
+ "eval_runtime": 1288.6066,
1910
+ "eval_samples_per_second": 0.421,
1911
+ "eval_steps_per_second": 0.421,
1912
+ "step": 28000
1913
  }
1914
  ],
1915
  "max_steps": 80463,
1916
  "num_train_epochs": 3,
1917
+ "total_flos": 7.852151375567094e+18,
1918
  "trial_name": null,
1919
  "trial_params": null
1920
  }
{checkpoint-24000 β†’ checkpoint-28000}/training_args.bin RENAMED
File without changes