ZeroUniqueness commited on
Commit
35e0f32
β€’
1 Parent(s): cd23378

Training in progress, step 35000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7511e65b55d29a7e86c8b6b83356365ea688292ba79d0f3a40289fa0d34e9a8c
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcaf4cf9f9bc20718f7e19c06c7145a78a3a5c6c1ca1a845231e7f1741b7f0e
3
  size 500897101
{checkpoint-31000 β†’ checkpoint-34000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-31000 β†’ checkpoint-34000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-31000 β†’ checkpoint-34000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab37ed994bf1726713cdc1d84b9560a915246c4a4028a868388ed725770a29f
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7511e65b55d29a7e86c8b6b83356365ea688292ba79d0f3a40289fa0d34e9a8c
3
  size 500897101
{checkpoint-31000/adapter_model β†’ checkpoint-35000}/README.md RENAMED
File without changes
{checkpoint-31000/adapter_model β†’ checkpoint-35000}/adapter_config.json RENAMED
File without changes
{checkpoint-31000/adapter_model β†’ checkpoint-35000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aab37ed994bf1726713cdc1d84b9560a915246c4a4028a868388ed725770a29f
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bcaf4cf9f9bc20718f7e19c06c7145a78a3a5c6c1ca1a845231e7f1741b7f0e
3
  size 500897101
{checkpoint-31000 β†’ checkpoint-35000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6744b3d4cba5318614d3585f0a827cdfe30350a5eee8ea7ce4ae59a497f8b7d4
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:021299a59b6ef922c41c510f3f8b93b7333417d0f6d05edbc3721b651b76908a
3
  size 1001723453
{checkpoint-31000 β†’ checkpoint-35000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d969796cab52300f6ffd6656a35cf7978e5664a3693183be68bcb0ae74d67a0
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07a8bcb98397480852e842446a8ddbd7b1c726fe43695a8f26d8722c832cf30
3
  size 14575
{checkpoint-31000 β†’ checkpoint-35000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:328b81c248c0e7203f70f9754413682ae0e11fa74f4beec41dcb9e104d993f01
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70880cdf77014daa95aad41e2ce4c4dffd35eef0882e2600a10e5ef00bca6a86
3
  size 627
{checkpoint-31000 β†’ checkpoint-35000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.5811149477958679,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-31000",
4
- "epoch": 1.1558107453115096,
5
- "global_step": 31000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2114,11 +2114,283 @@
2114
  "eval_samples_per_second": 0.417,
2115
  "eval_steps_per_second": 0.417,
2116
  "step": 31000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2117
  }
2118
  ],
2119
  "max_steps": 80463,
2120
  "num_train_epochs": 3,
2121
- "total_flos": 8.693892403678986e+18,
2122
  "trial_name": null,
2123
  "trial_params": null
2124
  }
 
1
  {
2
+ "best_metric": 0.562954843044281,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-35000",
4
+ "epoch": 1.3049476156742852,
5
+ "global_step": 35000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2114
  "eval_samples_per_second": 0.417,
2115
  "eval_steps_per_second": 0.417,
2116
  "step": 31000
2117
+ },
2118
+ {
2119
+ "epoch": 1.16,
2120
+ "learning_rate": 0.00013492488074124366,
2121
+ "loss": 0.5295,
2122
+ "step": 31100
2123
+ },
2124
+ {
2125
+ "epoch": 1.16,
2126
+ "learning_rate": 0.00013455871647446923,
2127
+ "loss": 0.539,
2128
+ "step": 31200
2129
+ },
2130
+ {
2131
+ "epoch": 1.17,
2132
+ "learning_rate": 0.00013419202525417277,
2133
+ "loss": 0.5217,
2134
+ "step": 31300
2135
+ },
2136
+ {
2137
+ "epoch": 1.17,
2138
+ "learning_rate": 0.0001338248126716854,
2139
+ "loss": 0.5197,
2140
+ "step": 31400
2141
+ },
2142
+ {
2143
+ "epoch": 1.17,
2144
+ "learning_rate": 0.00013345708432628824,
2145
+ "loss": 0.4991,
2146
+ "step": 31500
2147
+ },
2148
+ {
2149
+ "epoch": 1.18,
2150
+ "learning_rate": 0.00013308884582512647,
2151
+ "loss": 0.5239,
2152
+ "step": 31600
2153
+ },
2154
+ {
2155
+ "epoch": 1.18,
2156
+ "learning_rate": 0.00013272010278312453,
2157
+ "loss": 0.4899,
2158
+ "step": 31700
2159
+ },
2160
+ {
2161
+ "epoch": 1.19,
2162
+ "learning_rate": 0.00013235086082289977,
2163
+ "loss": 0.5088,
2164
+ "step": 31800
2165
+ },
2166
+ {
2167
+ "epoch": 1.19,
2168
+ "learning_rate": 0.00013198112557467732,
2169
+ "loss": 0.5497,
2170
+ "step": 31900
2171
+ },
2172
+ {
2173
+ "epoch": 1.19,
2174
+ "learning_rate": 0.00013161090267620396,
2175
+ "loss": 0.5024,
2176
+ "step": 32000
2177
+ },
2178
+ {
2179
+ "epoch": 1.19,
2180
+ "eval_loss": 0.5758991241455078,
2181
+ "eval_runtime": 1292.3362,
2182
+ "eval_samples_per_second": 0.419,
2183
+ "eval_steps_per_second": 0.419,
2184
+ "step": 32000
2185
+ },
2186
+ {
2187
+ "epoch": 1.2,
2188
+ "learning_rate": 0.0001312401977726621,
2189
+ "loss": 0.534,
2190
+ "step": 32100
2191
+ },
2192
+ {
2193
+ "epoch": 1.2,
2194
+ "learning_rate": 0.0001308690165165839,
2195
+ "loss": 0.4936,
2196
+ "step": 32200
2197
+ },
2198
+ {
2199
+ "epoch": 1.2,
2200
+ "learning_rate": 0.00013049736456776485,
2201
+ "loss": 0.4999,
2202
+ "step": 32300
2203
+ },
2204
+ {
2205
+ "epoch": 1.21,
2206
+ "learning_rate": 0.00013012524759317774,
2207
+ "loss": 0.5238,
2208
+ "step": 32400
2209
+ },
2210
+ {
2211
+ "epoch": 1.21,
2212
+ "learning_rate": 0.000129752671266886,
2213
+ "loss": 0.4959,
2214
+ "step": 32500
2215
+ },
2216
+ {
2217
+ "epoch": 1.22,
2218
+ "learning_rate": 0.00012937964126995727,
2219
+ "loss": 0.514,
2220
+ "step": 32600
2221
+ },
2222
+ {
2223
+ "epoch": 1.22,
2224
+ "learning_rate": 0.00012900616329037694,
2225
+ "loss": 0.4964,
2226
+ "step": 32700
2227
+ },
2228
+ {
2229
+ "epoch": 1.22,
2230
+ "learning_rate": 0.00012863224302296107,
2231
+ "loss": 0.5054,
2232
+ "step": 32800
2233
+ },
2234
+ {
2235
+ "epoch": 1.23,
2236
+ "learning_rate": 0.0001282578861692699,
2237
+ "loss": 0.5079,
2238
+ "step": 32900
2239
+ },
2240
+ {
2241
+ "epoch": 1.23,
2242
+ "learning_rate": 0.0001278830984375206,
2243
+ "loss": 0.4929,
2244
+ "step": 33000
2245
+ },
2246
+ {
2247
+ "epoch": 1.23,
2248
+ "eval_loss": 0.5719351172447205,
2249
+ "eval_runtime": 1267.7603,
2250
+ "eval_samples_per_second": 0.428,
2251
+ "eval_steps_per_second": 0.428,
2252
+ "step": 33000
2253
+ },
2254
+ {
2255
+ "epoch": 1.23,
2256
+ "learning_rate": 0.0001275078855425007,
2257
+ "loss": 0.4971,
2258
+ "step": 33100
2259
+ },
2260
+ {
2261
+ "epoch": 1.24,
2262
+ "learning_rate": 0.0001271322532054803,
2263
+ "loss": 0.4977,
2264
+ "step": 33200
2265
+ },
2266
+ {
2267
+ "epoch": 1.24,
2268
+ "learning_rate": 0.0001267562071541254,
2269
+ "loss": 0.499,
2270
+ "step": 33300
2271
+ },
2272
+ {
2273
+ "epoch": 1.25,
2274
+ "learning_rate": 0.00012637975312241022,
2275
+ "loss": 0.5044,
2276
+ "step": 33400
2277
+ },
2278
+ {
2279
+ "epoch": 1.25,
2280
+ "learning_rate": 0.00012600289685052996,
2281
+ "loss": 0.5019,
2282
+ "step": 33500
2283
+ },
2284
+ {
2285
+ "epoch": 1.25,
2286
+ "learning_rate": 0.00012562564408481327,
2287
+ "loss": 0.5225,
2288
+ "step": 33600
2289
+ },
2290
+ {
2291
+ "epoch": 1.26,
2292
+ "learning_rate": 0.00012524800057763438,
2293
+ "loss": 0.5503,
2294
+ "step": 33700
2295
+ },
2296
+ {
2297
+ "epoch": 1.26,
2298
+ "learning_rate": 0.00012486997208732573,
2299
+ "loss": 0.5025,
2300
+ "step": 33800
2301
+ },
2302
+ {
2303
+ "epoch": 1.26,
2304
+ "learning_rate": 0.0001244915643780899,
2305
+ "loss": 0.5187,
2306
+ "step": 33900
2307
+ },
2308
+ {
2309
+ "epoch": 1.27,
2310
+ "learning_rate": 0.00012411278321991195,
2311
+ "loss": 0.5199,
2312
+ "step": 34000
2313
+ },
2314
+ {
2315
+ "epoch": 1.27,
2316
+ "eval_loss": 0.5665221810340881,
2317
+ "eval_runtime": 1263.0264,
2318
+ "eval_samples_per_second": 0.429,
2319
+ "eval_steps_per_second": 0.429,
2320
+ "step": 34000
2321
+ },
2322
+ {
2323
+ "epoch": 1.27,
2324
+ "learning_rate": 0.00012373363438847117,
2325
+ "loss": 0.5135,
2326
+ "step": 34100
2327
+ },
2328
+ {
2329
+ "epoch": 1.28,
2330
+ "learning_rate": 0.00012335412366505324,
2331
+ "loss": 0.5065,
2332
+ "step": 34200
2333
+ },
2334
+ {
2335
+ "epoch": 1.28,
2336
+ "learning_rate": 0.000122974256836462,
2337
+ "loss": 0.5223,
2338
+ "step": 34300
2339
+ },
2340
+ {
2341
+ "epoch": 1.28,
2342
+ "learning_rate": 0.00012259403969493114,
2343
+ "loss": 0.4946,
2344
+ "step": 34400
2345
+ },
2346
+ {
2347
+ "epoch": 1.29,
2348
+ "learning_rate": 0.00012221347803803605,
2349
+ "loss": 0.5105,
2350
+ "step": 34500
2351
+ },
2352
+ {
2353
+ "epoch": 1.29,
2354
+ "learning_rate": 0.00012183257766860514,
2355
+ "loss": 0.4812,
2356
+ "step": 34600
2357
+ },
2358
+ {
2359
+ "epoch": 1.29,
2360
+ "learning_rate": 0.00012145134439463178,
2361
+ "loss": 0.4981,
2362
+ "step": 34700
2363
+ },
2364
+ {
2365
+ "epoch": 1.3,
2366
+ "learning_rate": 0.0001210697840291852,
2367
+ "loss": 0.5038,
2368
+ "step": 34800
2369
+ },
2370
+ {
2371
+ "epoch": 1.3,
2372
+ "learning_rate": 0.00012068790239032241,
2373
+ "loss": 0.5551,
2374
+ "step": 34900
2375
+ },
2376
+ {
2377
+ "epoch": 1.3,
2378
+ "learning_rate": 0.00012030570530099902,
2379
+ "loss": 0.4964,
2380
+ "step": 35000
2381
+ },
2382
+ {
2383
+ "epoch": 1.3,
2384
+ "eval_loss": 0.562954843044281,
2385
+ "eval_runtime": 1252.1434,
2386
+ "eval_samples_per_second": 0.433,
2387
+ "eval_steps_per_second": 0.433,
2388
+ "step": 35000
2389
  }
2390
  ],
2391
  "max_steps": 80463,
2392
  "num_train_epochs": 3,
2393
+ "total_flos": 9.815104047716352e+18,
2394
  "trial_name": null,
2395
  "trial_params": null
2396
  }
{checkpoint-31000 β†’ checkpoint-35000}/training_args.bin RENAMED
File without changes