nttx commited on
Commit
aa3aa9d
·
verified ·
1 Parent(s): 47f1250

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:189beaf6681e6d167f21a3a98f0dc7706adcbfe55b9863e00513984df19635d9
3
  size 138995824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42bdf7a12daee72631644ff26e7c9f80bbf2ff91189adeb79844f41ec3b3a0de
3
  size 138995824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a1746a0d8cbd16bd3e6cbf655cff4dd390682cb0ea8605c3478fee85a7e0e94
3
  size 71078228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dd720e9bd3c6b76823b903b091b8513f63bb3e34dff5623eb6c72cf14934888
3
  size 71078228
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf841e914f4af7cd797b78cf530260f06ddfcee84a29b6949d8e836b74c7ebd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912d3b1d35846784f3679bfe983446d30830056bb9c64fff6ed2d9239f2efee7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd47ab20420ef0e1aa00f2819f474325b5901ebc7d45e94590bc3f5c403b9641
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2d1d3e656c776bd4cb00d51e16dbc3c67347908a818c2aee478de24ffb13283
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.838295578956604,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1600",
4
- "epoch": 1.937046004842615,
5
  "eval_steps": 200,
6
- "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -303,6 +303,42 @@
303
  "eval_samples_per_second": 64.248,
304
  "eval_steps_per_second": 16.062,
305
  "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  }
307
  ],
308
  "logging_steps": 50,
@@ -331,7 +367,7 @@
331
  "attributes": {}
332
  }
333
  },
334
- "total_flos": 3.06845728210944e+16,
335
  "train_batch_size": 4,
336
  "trial_name": null,
337
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8344256281852722,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1800",
4
+ "epoch": 2.179176755447942,
5
  "eval_steps": 200,
6
+ "global_step": 1800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
303
  "eval_samples_per_second": 64.248,
304
  "eval_steps_per_second": 16.062,
305
  "step": 1600
306
+ },
307
+ {
308
+ "epoch": 1.9975786924939467,
309
+ "grad_norm": 0.8213570713996887,
310
+ "learning_rate": 8.141676086873572e-06,
311
+ "loss": 0.691,
312
+ "step": 1650
313
+ },
314
+ {
315
+ "epoch": 2.0581113801452786,
316
+ "grad_norm": 0.8155060410499573,
317
+ "learning_rate": 6.026312439675552e-06,
318
+ "loss": 0.803,
319
+ "step": 1700
320
+ },
321
+ {
322
+ "epoch": 2.1186440677966103,
323
+ "grad_norm": 0.870134711265564,
324
+ "learning_rate": 4.2113336672471245e-06,
325
+ "loss": 0.8079,
326
+ "step": 1750
327
+ },
328
+ {
329
+ "epoch": 2.179176755447942,
330
+ "grad_norm": 0.7964152097702026,
331
+ "learning_rate": 2.7091379149682685e-06,
332
+ "loss": 0.8172,
333
+ "step": 1800
334
+ },
335
+ {
336
+ "epoch": 2.179176755447942,
337
+ "eval_loss": 0.8344256281852722,
338
+ "eval_runtime": 10.8338,
339
+ "eval_samples_per_second": 64.244,
340
+ "eval_steps_per_second": 16.061,
341
+ "step": 1800
342
  }
343
  ],
344
  "logging_steps": 50,
 
367
  "attributes": {}
368
  }
369
  },
370
+ "total_flos": 3.446460309307392e+16,
371
  "train_batch_size": 4,
372
  "trial_name": null,
373
  "trial_params": null