ZeroUniqueness commited on
Commit
cd23378
β€’
1 Parent(s): db29ebd

Training in progress, step 34000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42cbea41e45d2d0b755f9666264a178377e0d657920d99d2ac36083b14a3fce
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7511e65b55d29a7e86c8b6b83356365ea688292ba79d0f3a40289fa0d34e9a8c
3
  size 500897101
{checkpoint-30000 β†’ checkpoint-33000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-30000 β†’ checkpoint-33000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-30000 β†’ checkpoint-33000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9537b3a7e6055deff50c06679b380e3a915c90d647696e0b31fc0265c367cbbc
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42cbea41e45d2d0b755f9666264a178377e0d657920d99d2ac36083b14a3fce
3
  size 500897101
{checkpoint-30000/adapter_model β†’ checkpoint-34000}/README.md RENAMED
File without changes
{checkpoint-30000/adapter_model β†’ checkpoint-34000}/adapter_config.json RENAMED
File without changes
{checkpoint-30000/adapter_model β†’ checkpoint-34000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9537b3a7e6055deff50c06679b380e3a915c90d647696e0b31fc0265c367cbbc
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7511e65b55d29a7e86c8b6b83356365ea688292ba79d0f3a40289fa0d34e9a8c
3
  size 500897101
{checkpoint-30000 β†’ checkpoint-34000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1c4c976ca70fca137b1074a48111db76f8417f793dd8b169c29e76d799f173
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed98569c7d57f25cd04cdbe01503f350ab8d1c53424912e6e1c8f906e815dfd9
3
  size 1001723453
{checkpoint-30000 β†’ checkpoint-34000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5aed0374c0ea79a6b6022726183fe49008353437a1d89b31492e7e657ff784f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78019e6be9dcb9854fd190539da62f7f53c85d3563e7512184a250af881b96a
3
  size 14575
{checkpoint-30000 β†’ checkpoint-34000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3860c8699ca30c96bb0a9da34e9e209f4fa8a3c56eb8f9c2ce426750974e9aa6
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f76b1b4ddd5a5fd1441715b84be8e888cf97db173bbe93e49ec15bc19296ed
3
  size 627
{checkpoint-30000 β†’ checkpoint-34000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.5869857668876648,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-30000",
4
- "epoch": 1.1185265277208158,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2046,11 +2046,283 @@
2046
  "eval_samples_per_second": 0.426,
2047
  "eval_steps_per_second": 0.426,
2048
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2049
  }
2050
  ],
2051
  "max_steps": 80463,
2052
  "num_train_epochs": 3,
2053
- "total_flos": 8.413138321233592e+18,
2054
  "trial_name": null,
2055
  "trial_params": null
2056
  }
 
1
  {
2
+ "best_metric": 0.5665221810340881,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-34000",
4
+ "epoch": 1.2676633980835912,
5
+ "global_step": 34000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2046
  "eval_samples_per_second": 0.426,
2047
  "eval_steps_per_second": 0.426,
2048
  "step": 30000
2049
+ },
2050
+ {
2051
+ "epoch": 1.12,
2052
+ "learning_rate": 0.00013855631672680106,
2053
+ "loss": 0.5243,
2054
+ "step": 30100
2055
+ },
2056
+ {
2057
+ "epoch": 1.13,
2058
+ "learning_rate": 0.00013819572770871702,
2059
+ "loss": 0.5148,
2060
+ "step": 30200
2061
+ },
2062
+ {
2063
+ "epoch": 1.13,
2064
+ "learning_rate": 0.00013783455627973062,
2065
+ "loss": 0.522,
2066
+ "step": 30300
2067
+ },
2068
+ {
2069
+ "epoch": 1.13,
2070
+ "learning_rate": 0.00013747280794700707,
2071
+ "loss": 0.5289,
2072
+ "step": 30400
2073
+ },
2074
+ {
2075
+ "epoch": 1.14,
2076
+ "learning_rate": 0.00013711048822650802,
2077
+ "loss": 0.4996,
2078
+ "step": 30500
2079
+ },
2080
+ {
2081
+ "epoch": 1.14,
2082
+ "learning_rate": 0.00013674760264290785,
2083
+ "loss": 0.5099,
2084
+ "step": 30600
2085
+ },
2086
+ {
2087
+ "epoch": 1.14,
2088
+ "learning_rate": 0.0001363841567295091,
2089
+ "loss": 0.5219,
2090
+ "step": 30700
2091
+ },
2092
+ {
2093
+ "epoch": 1.15,
2094
+ "learning_rate": 0.00013602015602815837,
2095
+ "loss": 0.5297,
2096
+ "step": 30800
2097
+ },
2098
+ {
2099
+ "epoch": 1.15,
2100
+ "learning_rate": 0.00013565560608916165,
2101
+ "loss": 0.5029,
2102
+ "step": 30900
2103
+ },
2104
+ {
2105
+ "epoch": 1.16,
2106
+ "learning_rate": 0.0001352905124711998,
2107
+ "loss": 0.5266,
2108
+ "step": 31000
2109
+ },
2110
+ {
2111
+ "epoch": 1.16,
2112
+ "eval_loss": 0.5811149477958679,
2113
+ "eval_runtime": 1300.1475,
2114
+ "eval_samples_per_second": 0.417,
2115
+ "eval_steps_per_second": 0.417,
2116
+ "step": 31000
2117
+ },
2118
+ {
2119
+ "epoch": 1.16,
2120
+ "learning_rate": 0.00013492488074124366,
2121
+ "loss": 0.5295,
2122
+ "step": 31100
2123
+ },
2124
+ {
2125
+ "epoch": 1.16,
2126
+ "learning_rate": 0.00013455871647446923,
2127
+ "loss": 0.539,
2128
+ "step": 31200
2129
+ },
2130
+ {
2131
+ "epoch": 1.17,
2132
+ "learning_rate": 0.00013419202525417277,
2133
+ "loss": 0.5217,
2134
+ "step": 31300
2135
+ },
2136
+ {
2137
+ "epoch": 1.17,
2138
+ "learning_rate": 0.0001338248126716854,
2139
+ "loss": 0.5197,
2140
+ "step": 31400
2141
+ },
2142
+ {
2143
+ "epoch": 1.17,
2144
+ "learning_rate": 0.00013345708432628824,
2145
+ "loss": 0.4991,
2146
+ "step": 31500
2147
+ },
2148
+ {
2149
+ "epoch": 1.18,
2150
+ "learning_rate": 0.00013308884582512647,
2151
+ "loss": 0.5239,
2152
+ "step": 31600
2153
+ },
2154
+ {
2155
+ "epoch": 1.18,
2156
+ "learning_rate": 0.00013272010278312453,
2157
+ "loss": 0.4899,
2158
+ "step": 31700
2159
+ },
2160
+ {
2161
+ "epoch": 1.19,
2162
+ "learning_rate": 0.00013235086082289977,
2163
+ "loss": 0.5088,
2164
+ "step": 31800
2165
+ },
2166
+ {
2167
+ "epoch": 1.19,
2168
+ "learning_rate": 0.00013198112557467732,
2169
+ "loss": 0.5497,
2170
+ "step": 31900
2171
+ },
2172
+ {
2173
+ "epoch": 1.19,
2174
+ "learning_rate": 0.00013161090267620396,
2175
+ "loss": 0.5024,
2176
+ "step": 32000
2177
+ },
2178
+ {
2179
+ "epoch": 1.19,
2180
+ "eval_loss": 0.5758991241455078,
2181
+ "eval_runtime": 1292.3362,
2182
+ "eval_samples_per_second": 0.419,
2183
+ "eval_steps_per_second": 0.419,
2184
+ "step": 32000
2185
+ },
2186
+ {
2187
+ "epoch": 1.2,
2188
+ "learning_rate": 0.0001312401977726621,
2189
+ "loss": 0.534,
2190
+ "step": 32100
2191
+ },
2192
+ {
2193
+ "epoch": 1.2,
2194
+ "learning_rate": 0.0001308690165165839,
2195
+ "loss": 0.4936,
2196
+ "step": 32200
2197
+ },
2198
+ {
2199
+ "epoch": 1.2,
2200
+ "learning_rate": 0.00013049736456776485,
2201
+ "loss": 0.4999,
2202
+ "step": 32300
2203
+ },
2204
+ {
2205
+ "epoch": 1.21,
2206
+ "learning_rate": 0.00013012524759317774,
2207
+ "loss": 0.5238,
2208
+ "step": 32400
2209
+ },
2210
+ {
2211
+ "epoch": 1.21,
2212
+ "learning_rate": 0.000129752671266886,
2213
+ "loss": 0.4959,
2214
+ "step": 32500
2215
+ },
2216
+ {
2217
+ "epoch": 1.22,
2218
+ "learning_rate": 0.00012937964126995727,
2219
+ "loss": 0.514,
2220
+ "step": 32600
2221
+ },
2222
+ {
2223
+ "epoch": 1.22,
2224
+ "learning_rate": 0.00012900616329037694,
2225
+ "loss": 0.4964,
2226
+ "step": 32700
2227
+ },
2228
+ {
2229
+ "epoch": 1.22,
2230
+ "learning_rate": 0.00012863224302296107,
2231
+ "loss": 0.5054,
2232
+ "step": 32800
2233
+ },
2234
+ {
2235
+ "epoch": 1.23,
2236
+ "learning_rate": 0.0001282578861692699,
2237
+ "loss": 0.5079,
2238
+ "step": 32900
2239
+ },
2240
+ {
2241
+ "epoch": 1.23,
2242
+ "learning_rate": 0.0001278830984375206,
2243
+ "loss": 0.4929,
2244
+ "step": 33000
2245
+ },
2246
+ {
2247
+ "epoch": 1.23,
2248
+ "eval_loss": 0.5719351172447205,
2249
+ "eval_runtime": 1267.7603,
2250
+ "eval_samples_per_second": 0.428,
2251
+ "eval_steps_per_second": 0.428,
2252
+ "step": 33000
2253
+ },
2254
+ {
2255
+ "epoch": 1.23,
2256
+ "learning_rate": 0.0001275078855425007,
2257
+ "loss": 0.4971,
2258
+ "step": 33100
2259
+ },
2260
+ {
2261
+ "epoch": 1.24,
2262
+ "learning_rate": 0.0001271322532054803,
2263
+ "loss": 0.4977,
2264
+ "step": 33200
2265
+ },
2266
+ {
2267
+ "epoch": 1.24,
2268
+ "learning_rate": 0.0001267562071541254,
2269
+ "loss": 0.499,
2270
+ "step": 33300
2271
+ },
2272
+ {
2273
+ "epoch": 1.25,
2274
+ "learning_rate": 0.00012637975312241022,
2275
+ "loss": 0.5044,
2276
+ "step": 33400
2277
+ },
2278
+ {
2279
+ "epoch": 1.25,
2280
+ "learning_rate": 0.00012600289685052996,
2281
+ "loss": 0.5019,
2282
+ "step": 33500
2283
+ },
2284
+ {
2285
+ "epoch": 1.25,
2286
+ "learning_rate": 0.00012562564408481327,
2287
+ "loss": 0.5225,
2288
+ "step": 33600
2289
+ },
2290
+ {
2291
+ "epoch": 1.26,
2292
+ "learning_rate": 0.00012524800057763438,
2293
+ "loss": 0.5503,
2294
+ "step": 33700
2295
+ },
2296
+ {
2297
+ "epoch": 1.26,
2298
+ "learning_rate": 0.00012486997208732573,
2299
+ "loss": 0.5025,
2300
+ "step": 33800
2301
+ },
2302
+ {
2303
+ "epoch": 1.26,
2304
+ "learning_rate": 0.0001244915643780899,
2305
+ "loss": 0.5187,
2306
+ "step": 33900
2307
+ },
2308
+ {
2309
+ "epoch": 1.27,
2310
+ "learning_rate": 0.00012411278321991195,
2311
+ "loss": 0.5199,
2312
+ "step": 34000
2313
+ },
2314
+ {
2315
+ "epoch": 1.27,
2316
+ "eval_loss": 0.5665221810340881,
2317
+ "eval_runtime": 1263.0264,
2318
+ "eval_samples_per_second": 0.429,
2319
+ "eval_steps_per_second": 0.429,
2320
+ "step": 34000
2321
  }
2322
  ],
2323
  "max_steps": 80463,
2324
  "num_train_epochs": 3,
2325
+ "total_flos": 9.535353877436006e+18,
2326
  "trial_name": null,
2327
  "trial_params": null
2328
  }
{checkpoint-30000 β†’ checkpoint-34000}/training_args.bin RENAMED
File without changes