ZeroUniqueness commited on
Commit
d752ca3
β€’
1 Parent(s): 1151ad0

Training in progress, step 30000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfba69a38235688144762f0028225bf75ceb8ed54f934556abed7f29749c0a1c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9537b3a7e6055deff50c06679b380e3a915c90d647696e0b31fc0265c367cbbc
3
  size 500897101
{checkpoint-26000 β†’ checkpoint-29000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-26000 β†’ checkpoint-29000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-26000 β†’ checkpoint-29000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db604a690ace9df3e57323e7728a62d95ca6fe945a7a4f0390b60803c2b0d0f0
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfba69a38235688144762f0028225bf75ceb8ed54f934556abed7f29749c0a1c
3
  size 500897101
{checkpoint-26000/adapter_model β†’ checkpoint-30000}/README.md RENAMED
File without changes
{checkpoint-26000/adapter_model β†’ checkpoint-30000}/adapter_config.json RENAMED
File without changes
{checkpoint-26000/adapter_model β†’ checkpoint-30000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db604a690ace9df3e57323e7728a62d95ca6fe945a7a4f0390b60803c2b0d0f0
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9537b3a7e6055deff50c06679b380e3a915c90d647696e0b31fc0265c367cbbc
3
  size 500897101
{checkpoint-26000 β†’ checkpoint-30000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27e0a8d1f02955048c81208b54b8e952c63d97fda72aa75ae96fcfc8f35af7cb
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1c4c976ca70fca137b1074a48111db76f8417f793dd8b169c29e76d799f173
3
  size 1001723453
{checkpoint-26000 β†’ checkpoint-30000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23c71a7c504ad8a21f65a57923dde0238456dda222b9af4b24d2bfc5f74e422c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5aed0374c0ea79a6b6022726183fe49008353437a1d89b31492e7e657ff784f
3
  size 14575
{checkpoint-26000 β†’ checkpoint-30000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a9f0595f0fbccf1fcdc4c120788726227aa9e6442e4233fc3c71485b74b1e4e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3860c8699ca30c96bb0a9da34e9e209f4fa8a3c56eb8f9c2ce426750974e9aa6
3
  size 627
{checkpoint-26000 β†’ checkpoint-30000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6026987433433533,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-26000",
4
- "epoch": 0.9693896573580404,
5
- "global_step": 26000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1774,11 +1774,283 @@
1774
  "eval_samples_per_second": 0.419,
1775
  "eval_steps_per_second": 0.419,
1776
  "step": 26000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1777
  }
1778
  ],
1779
  "max_steps": 80463,
1780
  "num_train_epochs": 3,
1781
- "total_flos": 7.289703933515858e+18,
1782
  "trial_name": null,
1783
  "trial_params": null
1784
  }
 
1
  {
2
+ "best_metric": 0.5869857668876648,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-30000",
4
+ "epoch": 1.1185265277208158,
5
+ "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1774
  "eval_samples_per_second": 0.419,
1775
  "eval_steps_per_second": 0.419,
1776
  "step": 26000
1777
+ },
1778
+ {
1779
+ "epoch": 0.97,
1780
+ "learning_rate": 0.00015244025404312974,
1781
+ "loss": 0.6015,
1782
+ "step": 26100
1783
+ },
1784
+ {
1785
+ "epoch": 0.98,
1786
+ "learning_rate": 0.00015210736612567588,
1787
+ "loss": 0.5914,
1788
+ "step": 26200
1789
+ },
1790
+ {
1791
+ "epoch": 0.98,
1792
+ "learning_rate": 0.00015177368367176616,
1793
+ "loss": 0.5799,
1794
+ "step": 26300
1795
+ },
1796
+ {
1797
+ "epoch": 0.98,
1798
+ "learning_rate": 0.00015143921176941205,
1799
+ "loss": 0.6037,
1800
+ "step": 26400
1801
+ },
1802
+ {
1803
+ "epoch": 0.99,
1804
+ "learning_rate": 0.00015110395551866255,
1805
+ "loss": 0.5876,
1806
+ "step": 26500
1807
+ },
1808
+ {
1809
+ "epoch": 0.99,
1810
+ "learning_rate": 0.0001507679200315264,
1811
+ "loss": 0.5973,
1812
+ "step": 26600
1813
+ },
1814
+ {
1815
+ "epoch": 1.0,
1816
+ "learning_rate": 0.00015043111043189423,
1817
+ "loss": 0.5957,
1818
+ "step": 26700
1819
+ },
1820
+ {
1821
+ "epoch": 1.0,
1822
+ "learning_rate": 0.00015009353185546046,
1823
+ "loss": 0.5696,
1824
+ "step": 26800
1825
+ },
1826
+ {
1827
+ "epoch": 1.0,
1828
+ "learning_rate": 0.00014975518944964478,
1829
+ "loss": 0.5523,
1830
+ "step": 26900
1831
+ },
1832
+ {
1833
+ "epoch": 1.01,
1834
+ "learning_rate": 0.0001494160883735139,
1835
+ "loss": 0.5144,
1836
+ "step": 27000
1837
+ },
1838
+ {
1839
+ "epoch": 1.01,
1840
+ "eval_loss": 0.5985096096992493,
1841
+ "eval_runtime": 1314.8131,
1842
+ "eval_samples_per_second": 0.412,
1843
+ "eval_steps_per_second": 0.412,
1844
+ "step": 27000
1845
+ },
1846
+ {
1847
+ "epoch": 1.01,
1848
+ "learning_rate": 0.00014907623379770263,
1849
+ "loss": 0.5743,
1850
+ "step": 27100
1851
+ },
1852
+ {
1853
+ "epoch": 1.01,
1854
+ "learning_rate": 0.00014873563090433547,
1855
+ "loss": 0.5095,
1856
+ "step": 27200
1857
+ },
1858
+ {
1859
+ "epoch": 1.02,
1860
+ "learning_rate": 0.00014839428488694706,
1861
+ "loss": 0.5391,
1862
+ "step": 27300
1863
+ },
1864
+ {
1865
+ "epoch": 1.02,
1866
+ "learning_rate": 0.00014805220095040334,
1867
+ "loss": 0.5532,
1868
+ "step": 27400
1869
+ },
1870
+ {
1871
+ "epoch": 1.03,
1872
+ "learning_rate": 0.00014770938431082212,
1873
+ "loss": 0.536,
1874
+ "step": 27500
1875
+ },
1876
+ {
1877
+ "epoch": 1.03,
1878
+ "learning_rate": 0.00014736584019549342,
1879
+ "loss": 0.5204,
1880
+ "step": 27600
1881
+ },
1882
+ {
1883
+ "epoch": 1.03,
1884
+ "learning_rate": 0.00014702157384279997,
1885
+ "loss": 0.5026,
1886
+ "step": 27700
1887
+ },
1888
+ {
1889
+ "epoch": 1.04,
1890
+ "learning_rate": 0.0001466765905021371,
1891
+ "loss": 0.5319,
1892
+ "step": 27800
1893
+ },
1894
+ {
1895
+ "epoch": 1.04,
1896
+ "learning_rate": 0.00014633089543383295,
1897
+ "loss": 0.5112,
1898
+ "step": 27900
1899
+ },
1900
+ {
1901
+ "epoch": 1.04,
1902
+ "learning_rate": 0.00014598449390906804,
1903
+ "loss": 0.5146,
1904
+ "step": 28000
1905
+ },
1906
+ {
1907
+ "epoch": 1.04,
1908
+ "eval_loss": 0.5959522128105164,
1909
+ "eval_runtime": 1288.6066,
1910
+ "eval_samples_per_second": 0.421,
1911
+ "eval_steps_per_second": 0.421,
1912
+ "step": 28000
1913
+ },
1914
+ {
1915
+ "epoch": 1.05,
1916
+ "learning_rate": 0.00014563739120979497,
1917
+ "loss": 0.5262,
1918
+ "step": 28100
1919
+ },
1920
+ {
1921
+ "epoch": 1.05,
1922
+ "learning_rate": 0.00014528959262865798,
1923
+ "loss": 0.5082,
1924
+ "step": 28200
1925
+ },
1926
+ {
1927
+ "epoch": 1.06,
1928
+ "learning_rate": 0.00014494110346891206,
1929
+ "loss": 0.5094,
1930
+ "step": 28300
1931
+ },
1932
+ {
1933
+ "epoch": 1.06,
1934
+ "learning_rate": 0.00014459192904434226,
1935
+ "loss": 0.5012,
1936
+ "step": 28400
1937
+ },
1938
+ {
1939
+ "epoch": 1.06,
1940
+ "learning_rate": 0.0001442420746791826,
1941
+ "loss": 0.4946,
1942
+ "step": 28500
1943
+ },
1944
+ {
1945
+ "epoch": 1.07,
1946
+ "learning_rate": 0.00014389154570803477,
1947
+ "loss": 0.5138,
1948
+ "step": 28600
1949
+ },
1950
+ {
1951
+ "epoch": 1.07,
1952
+ "learning_rate": 0.000143540347475787,
1953
+ "loss": 0.5082,
1954
+ "step": 28700
1955
+ },
1956
+ {
1957
+ "epoch": 1.07,
1958
+ "learning_rate": 0.0001431884853375325,
1959
+ "loss": 0.4842,
1960
+ "step": 28800
1961
+ },
1962
+ {
1963
+ "epoch": 1.08,
1964
+ "learning_rate": 0.0001428359646584876,
1965
+ "loss": 0.5143,
1966
+ "step": 28900
1967
+ },
1968
+ {
1969
+ "epoch": 1.08,
1970
+ "learning_rate": 0.00014248279081391022,
1971
+ "loss": 0.5029,
1972
+ "step": 29000
1973
+ },
1974
+ {
1975
+ "epoch": 1.08,
1976
+ "eval_loss": 0.5910914540290833,
1977
+ "eval_runtime": 1278.8257,
1978
+ "eval_samples_per_second": 0.424,
1979
+ "eval_steps_per_second": 0.424,
1980
+ "step": 29000
1981
+ },
1982
+ {
1983
+ "epoch": 1.08,
1984
+ "learning_rate": 0.00014212896918901774,
1985
+ "loss": 0.5003,
1986
+ "step": 29100
1987
+ },
1988
+ {
1989
+ "epoch": 1.09,
1990
+ "learning_rate": 0.00014177450517890503,
1991
+ "loss": 0.5102,
1992
+ "step": 29200
1993
+ },
1994
+ {
1995
+ "epoch": 1.09,
1996
+ "learning_rate": 0.0001414194041884619,
1997
+ "loss": 0.524,
1998
+ "step": 29300
1999
+ },
2000
+ {
2001
+ "epoch": 1.1,
2002
+ "learning_rate": 0.0001410636716322911,
2003
+ "loss": 0.5168,
2004
+ "step": 29400
2005
+ },
2006
+ {
2007
+ "epoch": 1.1,
2008
+ "learning_rate": 0.0001407073129346254,
2009
+ "loss": 0.514,
2010
+ "step": 29500
2011
+ },
2012
+ {
2013
+ "epoch": 1.1,
2014
+ "learning_rate": 0.00014035033352924502,
2015
+ "loss": 0.5084,
2016
+ "step": 29600
2017
+ },
2018
+ {
2019
+ "epoch": 1.11,
2020
+ "learning_rate": 0.0001399927388593948,
2021
+ "loss": 0.5203,
2022
+ "step": 29700
2023
+ },
2024
+ {
2025
+ "epoch": 1.11,
2026
+ "learning_rate": 0.00013963453437770119,
2027
+ "loss": 0.5226,
2028
+ "step": 29800
2029
+ },
2030
+ {
2031
+ "epoch": 1.11,
2032
+ "learning_rate": 0.000139275725546089,
2033
+ "loss": 0.5055,
2034
+ "step": 29900
2035
+ },
2036
+ {
2037
+ "epoch": 1.12,
2038
+ "learning_rate": 0.00013891631783569838,
2039
+ "loss": 0.5303,
2040
+ "step": 30000
2041
+ },
2042
+ {
2043
+ "epoch": 1.12,
2044
+ "eval_loss": 0.5869857668876648,
2045
+ "eval_runtime": 1272.0282,
2046
+ "eval_samples_per_second": 0.426,
2047
+ "eval_steps_per_second": 0.426,
2048
+ "step": 30000
2049
  }
2050
  ],
2051
  "max_steps": 80463,
2052
  "num_train_epochs": 3,
2053
+ "total_flos": 8.413138321233592e+18,
2054
  "trial_name": null,
2055
  "trial_params": null
2056
  }
{checkpoint-26000 β†’ checkpoint-30000}/training_args.bin RENAMED
File without changes