TracyTank commited on
Commit
8557200
1 Parent(s): 9af8f1c

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3509006e05941335e4fedcccc62b6832c8759f0f916e4e9a846259e72704b21d
3
  size 201361312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385d8ae5b31ac1044bd05487dee0344d9bb0535cf0ce71bb97a06b8f8e2e9bc2
3
  size 201361312
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00b98fc13970f172c1ed29b9c10b8b2fab993bc701f5cbfc3cd423e7b49ae0e1
3
  size 402868986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5194b987daaeac7e7a7c5eb439efe6e2e158c3c86dcd9b0fa46a06505a58a80c
3
  size 402868986
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98ccd11b00bae3ef4bff0ec9d8a4986ff4b41a3d490090c8bc6df9ebdefb9df5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fdba768be06ecb8d82beabc762980b75e4fa86779fa23b425a671cbaeb6a53
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b78362f5c4037f0720a9a9b1662f4ed2a533a35224c64e3bc01c30ecdcb5c45c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d77a9f61cd76d47af6f298eb031e99333cdc3de06b4afd8cf98603a3262646be
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa14929c99d7e7deee3d9367cf63b13218efd66c131ea7746a041755a61d83ed
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46a016ca20d80bbf46c87a0b3176f96735503cb591437a6e800b470845d4e5b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:372e8783649c8077b23efca291d26437d974a10ece7beca379e4196cdeb4020a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac2bcc26c39f904b42cb54269d75b9fc51ac51e591642a9f442286b9a0964ff
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d271cdb95f63cd655315f063ca2e25c78dc5ae4275523c5d4f80f367586b3351
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5607f6de446164d9d9adb8b91c44cec55b14aa391e24ba5637c08b834eedda2a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9972949028015137,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.970873786407767,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 47.238,
199
  "eval_steps_per_second": 12.282,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 1.483568811343872e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9236457347869873,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 1.941747572815534,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 47.238,
199
  "eval_steps_per_second": 12.282,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 1.0097087378640777,
204
+ "grad_norm": 25.719419479370117,
205
+ "learning_rate": 5.500000000000001e-05,
206
+ "loss": 16.4112,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 1.0485436893203883,
211
+ "grad_norm": 29.739490509033203,
212
+ "learning_rate": 5.205685918464356e-05,
213
+ "loss": 13.7014,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 1.087378640776699,
218
+ "grad_norm": 34.622406005859375,
219
+ "learning_rate": 4.912632135009769e-05,
220
+ "loss": 14.9217,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 1.1262135922330097,
225
+ "grad_norm": 30.102741241455078,
226
+ "learning_rate": 4.6220935509274235e-05,
227
+ "loss": 17.4172,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 1.1650485436893203,
232
+ "grad_norm": 11.93884563446045,
233
+ "learning_rate": 4.3353142970386564e-05,
234
+ "loss": 14.4702,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 1.203883495145631,
239
+ "grad_norm": 6.051325798034668,
240
+ "learning_rate": 4.053522406135775e-05,
241
+ "loss": 18.1343,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 1.2427184466019416,
246
+ "grad_norm": 21.685884475708008,
247
+ "learning_rate": 3.777924554357096e-05,
248
+ "loss": 18.9043,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 1.2815533980582523,
253
+ "grad_norm": 103.92913055419922,
254
+ "learning_rate": 3.509700894014496e-05,
255
+ "loss": 17.5716,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 1.3203883495145632,
260
+ "grad_norm": 41.143226623535156,
261
+ "learning_rate": 3.250000000000001e-05,
262
+ "loss": 16.5813,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 1.3592233009708738,
267
+ "grad_norm": 13.322393417358398,
268
+ "learning_rate": 2.9999339514117912e-05,
269
+ "loss": 17.0534,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 1.3980582524271845,
274
+ "grad_norm": 26.480480194091797,
275
+ "learning_rate": 2.760573569460757e-05,
276
+ "loss": 14.6343,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 1.4368932038834952,
281
+ "grad_norm": 4.912123680114746,
282
+ "learning_rate": 2.53294383204969e-05,
283
+ "loss": 17.8308,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 1.4757281553398058,
288
+ "grad_norm": 7.7494282722473145,
289
+ "learning_rate": 2.3180194846605367e-05,
290
+ "loss": 18.1934,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 1.5145631067961165,
295
+ "grad_norm": 28.394657135009766,
296
+ "learning_rate": 2.1167208663446025e-05,
297
+ "loss": 15.4537,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 1.5533980582524272,
302
+ "grad_norm": 29.47159767150879,
303
+ "learning_rate": 1.9299099686894423e-05,
304
+ "loss": 12.7481,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 1.5922330097087378,
309
+ "grad_norm": 23.724668502807617,
310
+ "learning_rate": 1.758386744638546e-05,
311
+ "loss": 16.9,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 1.6310679611650487,
316
+ "grad_norm": 23.437341690063477,
317
+ "learning_rate": 1.602885682970026e-05,
318
+ "loss": 15.56,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 1.6699029126213594,
323
+ "grad_norm": 17.16952133178711,
324
+ "learning_rate": 1.464072663102903e-05,
325
+ "loss": 16.7025,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 1.70873786407767,
330
+ "grad_norm": 16.27042579650879,
331
+ "learning_rate": 1.3425421036992098e-05,
332
+ "loss": 17.9029,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 1.7475728155339807,
337
+ "grad_norm": 16.81924057006836,
338
+ "learning_rate": 1.2388144172720251e-05,
339
+ "loss": 14.544,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 1.7864077669902914,
344
+ "grad_norm": 17.303743362426758,
345
+ "learning_rate": 1.1533337816991932e-05,
346
+ "loss": 11.5598,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 1.825242718446602,
351
+ "grad_norm": 24.92502212524414,
352
+ "learning_rate": 1.0864662381854632e-05,
353
+ "loss": 15.3898,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 1.8640776699029127,
358
+ "grad_norm": 12.400360107421875,
359
+ "learning_rate": 1.0384981238178534e-05,
360
+ "loss": 16.3068,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 1.9029126213592233,
365
+ "grad_norm": 11.315818786621094,
366
+ "learning_rate": 1.0096348454262845e-05,
367
+ "loss": 16.1665,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 1.941747572815534,
372
+ "grad_norm": 8.587471961975098,
373
+ "learning_rate": 1e-05,
374
+ "loss": 17.7138,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.941747572815534,
379
+ "eval_loss": 0.9236457347869873,
380
+ "eval_runtime": 1.0598,
381
+ "eval_samples_per_second": 47.178,
382
+ "eval_steps_per_second": 12.266,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 2.967137622687744e+17,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null