ZeroUniqueness commited on
Commit
a024d53
Β·
1 Parent(s): d56baf3

Training in progress, step 32000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab37ed994bf1726713cdc1d84b9560a915246c4a4028a868388ed725770a29f
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd53296eb8a21ba14a04ebafc8d98535866046120ab70e3a5896c0601a199c0
3
  size 500897101
{checkpoint-28000 β†’ checkpoint-31000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-28000 β†’ checkpoint-31000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-28000 β†’ checkpoint-31000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196c1d0349fe865fe652da8fb5f0318dc7fea1691a4f6b9ce949ba1d8fc568d2
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aab37ed994bf1726713cdc1d84b9560a915246c4a4028a868388ed725770a29f
3
  size 500897101
{checkpoint-28000/adapter_model β†’ checkpoint-32000}/README.md RENAMED
File without changes
{checkpoint-28000/adapter_model β†’ checkpoint-32000}/adapter_config.json RENAMED
File without changes
{checkpoint-28000/adapter_model β†’ checkpoint-32000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196c1d0349fe865fe652da8fb5f0318dc7fea1691a4f6b9ce949ba1d8fc568d2
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd53296eb8a21ba14a04ebafc8d98535866046120ab70e3a5896c0601a199c0
3
  size 500897101
{checkpoint-28000 β†’ checkpoint-32000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04d78b2c094992c84dfe56222bca52d649375e78ba1b620b3066065ac043d46c
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3237a6e7d87456a67a6d80fd741d50e0bff0a09db16b732bfb6a6221adc94de0
3
  size 1001723453
{checkpoint-28000 β†’ checkpoint-32000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4463358c0b4ae7957b6ac92cc6e7a3966deed18d8592e7b96d0b5329e30e9c3
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97f4034592e6327496d0e86e02fdeb322ac2850e71e98c7091c4e86f80ea32c4
3
  size 14575
{checkpoint-28000 β†’ checkpoint-32000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f454f0dd53d23ae46c76eed24d3b8210f5bc3588acc88def4db4574c3744dffb
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25644fa9672216c7372c64ae67805d0d353dcf7228893c13f2f02bf1f0c031e7
3
  size 627
{checkpoint-28000 β†’ checkpoint-32000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.5959522128105164,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-28000",
4
- "epoch": 1.043958092539428,
5
- "global_step": 28000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1910,11 +1910,283 @@
1910
  "eval_samples_per_second": 0.421,
1911
  "eval_steps_per_second": 0.421,
1912
  "step": 28000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1913
  }
1914
  ],
1915
  "max_steps": 80463,
1916
  "num_train_epochs": 3,
1917
- "total_flos": 7.852151375567094e+18,
1918
  "trial_name": null,
1919
  "trial_params": null
1920
  }
 
1
  {
2
+ "best_metric": 0.5758991241455078,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-32000",
4
+ "epoch": 1.1930949629022034,
5
+ "global_step": 32000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1910
  "eval_samples_per_second": 0.421,
1911
  "eval_steps_per_second": 0.421,
1912
  "step": 28000
1913
+ },
1914
+ {
1915
+ "epoch": 1.05,
1916
+ "learning_rate": 0.00014563739120979497,
1917
+ "loss": 0.5262,
1918
+ "step": 28100
1919
+ },
1920
+ {
1921
+ "epoch": 1.05,
1922
+ "learning_rate": 0.00014528959262865798,
1923
+ "loss": 0.5082,
1924
+ "step": 28200
1925
+ },
1926
+ {
1927
+ "epoch": 1.06,
1928
+ "learning_rate": 0.00014494110346891206,
1929
+ "loss": 0.5094,
1930
+ "step": 28300
1931
+ },
1932
+ {
1933
+ "epoch": 1.06,
1934
+ "learning_rate": 0.00014459192904434226,
1935
+ "loss": 0.5012,
1936
+ "step": 28400
1937
+ },
1938
+ {
1939
+ "epoch": 1.06,
1940
+ "learning_rate": 0.0001442420746791826,
1941
+ "loss": 0.4946,
1942
+ "step": 28500
1943
+ },
1944
+ {
1945
+ "epoch": 1.07,
1946
+ "learning_rate": 0.00014389154570803477,
1947
+ "loss": 0.5138,
1948
+ "step": 28600
1949
+ },
1950
+ {
1951
+ "epoch": 1.07,
1952
+ "learning_rate": 0.000143540347475787,
1953
+ "loss": 0.5082,
1954
+ "step": 28700
1955
+ },
1956
+ {
1957
+ "epoch": 1.07,
1958
+ "learning_rate": 0.0001431884853375325,
1959
+ "loss": 0.4842,
1960
+ "step": 28800
1961
+ },
1962
+ {
1963
+ "epoch": 1.08,
1964
+ "learning_rate": 0.0001428359646584876,
1965
+ "loss": 0.5143,
1966
+ "step": 28900
1967
+ },
1968
+ {
1969
+ "epoch": 1.08,
1970
+ "learning_rate": 0.00014248279081391022,
1971
+ "loss": 0.5029,
1972
+ "step": 29000
1973
+ },
1974
+ {
1975
+ "epoch": 1.08,
1976
+ "eval_loss": 0.5910914540290833,
1977
+ "eval_runtime": 1278.8257,
1978
+ "eval_samples_per_second": 0.424,
1979
+ "eval_steps_per_second": 0.424,
1980
+ "step": 29000
1981
+ },
1982
+ {
1983
+ "epoch": 1.08,
1984
+ "learning_rate": 0.00014212896918901774,
1985
+ "loss": 0.5003,
1986
+ "step": 29100
1987
+ },
1988
+ {
1989
+ "epoch": 1.09,
1990
+ "learning_rate": 0.00014177450517890503,
1991
+ "loss": 0.5102,
1992
+ "step": 29200
1993
+ },
1994
+ {
1995
+ "epoch": 1.09,
1996
+ "learning_rate": 0.0001414194041884619,
1997
+ "loss": 0.524,
1998
+ "step": 29300
1999
+ },
2000
+ {
2001
+ "epoch": 1.1,
2002
+ "learning_rate": 0.0001410636716322911,
2003
+ "loss": 0.5168,
2004
+ "step": 29400
2005
+ },
2006
+ {
2007
+ "epoch": 1.1,
2008
+ "learning_rate": 0.0001407073129346254,
2009
+ "loss": 0.514,
2010
+ "step": 29500
2011
+ },
2012
+ {
2013
+ "epoch": 1.1,
2014
+ "learning_rate": 0.00014035033352924502,
2015
+ "loss": 0.5084,
2016
+ "step": 29600
2017
+ },
2018
+ {
2019
+ "epoch": 1.11,
2020
+ "learning_rate": 0.0001399927388593948,
2021
+ "loss": 0.5203,
2022
+ "step": 29700
2023
+ },
2024
+ {
2025
+ "epoch": 1.11,
2026
+ "learning_rate": 0.00013963453437770119,
2027
+ "loss": 0.5226,
2028
+ "step": 29800
2029
+ },
2030
+ {
2031
+ "epoch": 1.11,
2032
+ "learning_rate": 0.000139275725546089,
2033
+ "loss": 0.5055,
2034
+ "step": 29900
2035
+ },
2036
+ {
2037
+ "epoch": 1.12,
2038
+ "learning_rate": 0.00013891631783569838,
2039
+ "loss": 0.5303,
2040
+ "step": 30000
2041
+ },
2042
+ {
2043
+ "epoch": 1.12,
2044
+ "eval_loss": 0.5869857668876648,
2045
+ "eval_runtime": 1272.0282,
2046
+ "eval_samples_per_second": 0.426,
2047
+ "eval_steps_per_second": 0.426,
2048
+ "step": 30000
2049
+ },
2050
+ {
2051
+ "epoch": 1.12,
2052
+ "learning_rate": 0.00013855631672680106,
2053
+ "loss": 0.5243,
2054
+ "step": 30100
2055
+ },
2056
+ {
2057
+ "epoch": 1.13,
2058
+ "learning_rate": 0.00013819572770871702,
2059
+ "loss": 0.5148,
2060
+ "step": 30200
2061
+ },
2062
+ {
2063
+ "epoch": 1.13,
2064
+ "learning_rate": 0.00013783455627973062,
2065
+ "loss": 0.522,
2066
+ "step": 30300
2067
+ },
2068
+ {
2069
+ "epoch": 1.13,
2070
+ "learning_rate": 0.00013747280794700707,
2071
+ "loss": 0.5289,
2072
+ "step": 30400
2073
+ },
2074
+ {
2075
+ "epoch": 1.14,
2076
+ "learning_rate": 0.00013711048822650802,
2077
+ "loss": 0.4996,
2078
+ "step": 30500
2079
+ },
2080
+ {
2081
+ "epoch": 1.14,
2082
+ "learning_rate": 0.00013674760264290785,
2083
+ "loss": 0.5099,
2084
+ "step": 30600
2085
+ },
2086
+ {
2087
+ "epoch": 1.14,
2088
+ "learning_rate": 0.0001363841567295091,
2089
+ "loss": 0.5219,
2090
+ "step": 30700
2091
+ },
2092
+ {
2093
+ "epoch": 1.15,
2094
+ "learning_rate": 0.00013602015602815837,
2095
+ "loss": 0.5297,
2096
+ "step": 30800
2097
+ },
2098
+ {
2099
+ "epoch": 1.15,
2100
+ "learning_rate": 0.00013565560608916165,
2101
+ "loss": 0.5029,
2102
+ "step": 30900
2103
+ },
2104
+ {
2105
+ "epoch": 1.16,
2106
+ "learning_rate": 0.0001352905124711998,
2107
+ "loss": 0.5266,
2108
+ "step": 31000
2109
+ },
2110
+ {
2111
+ "epoch": 1.16,
2112
+ "eval_loss": 0.5811149477958679,
2113
+ "eval_runtime": 1300.1475,
2114
+ "eval_samples_per_second": 0.417,
2115
+ "eval_steps_per_second": 0.417,
2116
+ "step": 31000
2117
+ },
2118
+ {
2119
+ "epoch": 1.16,
2120
+ "learning_rate": 0.00013492488074124366,
2121
+ "loss": 0.5295,
2122
+ "step": 31100
2123
+ },
2124
+ {
2125
+ "epoch": 1.16,
2126
+ "learning_rate": 0.00013455871647446923,
2127
+ "loss": 0.539,
2128
+ "step": 31200
2129
+ },
2130
+ {
2131
+ "epoch": 1.17,
2132
+ "learning_rate": 0.00013419202525417277,
2133
+ "loss": 0.5217,
2134
+ "step": 31300
2135
+ },
2136
+ {
2137
+ "epoch": 1.17,
2138
+ "learning_rate": 0.0001338248126716854,
2139
+ "loss": 0.5197,
2140
+ "step": 31400
2141
+ },
2142
+ {
2143
+ "epoch": 1.17,
2144
+ "learning_rate": 0.00013345708432628824,
2145
+ "loss": 0.4991,
2146
+ "step": 31500
2147
+ },
2148
+ {
2149
+ "epoch": 1.18,
2150
+ "learning_rate": 0.00013308884582512647,
2151
+ "loss": 0.5239,
2152
+ "step": 31600
2153
+ },
2154
+ {
2155
+ "epoch": 1.18,
2156
+ "learning_rate": 0.00013272010278312453,
2157
+ "loss": 0.4899,
2158
+ "step": 31700
2159
+ },
2160
+ {
2161
+ "epoch": 1.19,
2162
+ "learning_rate": 0.00013235086082289977,
2163
+ "loss": 0.5088,
2164
+ "step": 31800
2165
+ },
2166
+ {
2167
+ "epoch": 1.19,
2168
+ "learning_rate": 0.00013198112557467732,
2169
+ "loss": 0.5497,
2170
+ "step": 31900
2171
+ },
2172
+ {
2173
+ "epoch": 1.19,
2174
+ "learning_rate": 0.00013161090267620396,
2175
+ "loss": 0.5024,
2176
+ "step": 32000
2177
+ },
2178
+ {
2179
+ "epoch": 1.19,
2180
+ "eval_loss": 0.5758991241455078,
2181
+ "eval_runtime": 1292.3362,
2182
+ "eval_samples_per_second": 0.419,
2183
+ "eval_steps_per_second": 0.419,
2184
+ "step": 32000
2185
  }
2186
  ],
2187
  "max_steps": 80463,
2188
  "num_train_epochs": 3,
2189
+ "total_flos": 8.975876780005048e+18,
2190
  "trial_name": null,
2191
  "trial_params": null
2192
  }
{checkpoint-28000 β†’ checkpoint-32000}/training_args.bin RENAMED
File without changes