leixa commited on
Commit
d2fa9e8
·
verified ·
1 Parent(s): e20d722

Training in progress, step 189, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376c1539161dc35ee7656886a50f802f6e2f1e85db06a39df8d4512c98347a1e
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e31caa0549662895406b35f60ee272ab02940a6d1169d24cf30843d63008545
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e65aeaef577b1a8e4576abf9c9c86db18a32c69b2007bb249b839a1a38a72370
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a691ecd73b2e1973b2873ff3d47ecc200e0a85a6854d7fd2dc11400647afb9e
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71416d694a6814725dec641fd551c41fa66aba026680767844a670043a5a519e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4d35c87f5d0e8ea62bc673d149da7d93d79a8b9b3951cb1475638c45526807b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5f1659825841d1ed6f30cab4376f396fc018e1826ceec69f151ade48995de07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd12c7442104c6bee1178cd38829bd00c01ded478e430b03fe72cfaff700e4be
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0362537764350455,
5
  "eval_steps": 21,
6
- "global_step": 168,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -471,6 +471,63 @@
471
  "eval_samples_per_second": 539.382,
472
  "eval_steps_per_second": 69.349,
473
  "step": 168
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
  }
475
  ],
476
  "logging_steps": 3,
@@ -490,7 +547,7 @@
490
  "attributes": {}
491
  }
492
  },
493
- "total_flos": 18740510982144.0,
494
  "train_batch_size": 8,
495
  "trial_name": null,
496
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.290030211480363,
5
  "eval_steps": 21,
6
+ "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
471
  "eval_samples_per_second": 539.382,
472
  "eval_steps_per_second": 69.349,
473
  "step": 168
474
+ },
475
+ {
476
+ "epoch": 2.0725075528700905,
477
+ "grad_norm": 0.19772112369537354,
478
+ "learning_rate": 2.4057435257851175e-05,
479
+ "loss": 10.1846,
480
+ "step": 171
481
+ },
482
+ {
483
+ "epoch": 2.108761329305136,
484
+ "grad_norm": 0.29851359128952026,
485
+ "learning_rate": 2.2392494902427025e-05,
486
+ "loss": 10.1801,
487
+ "step": 174
488
+ },
489
+ {
490
+ "epoch": 2.1450151057401814,
491
+ "grad_norm": 0.21380534768104553,
492
+ "learning_rate": 2.07704802418419e-05,
493
+ "loss": 10.1843,
494
+ "step": 177
495
+ },
496
+ {
497
+ "epoch": 2.1812688821752264,
498
+ "grad_norm": 0.1674821972846985,
499
+ "learning_rate": 1.9193913275316626e-05,
500
+ "loss": 10.1844,
501
+ "step": 180
502
+ },
503
+ {
504
+ "epoch": 2.217522658610272,
505
+ "grad_norm": 0.1863589584827423,
506
+ "learning_rate": 1.7665245337452368e-05,
507
+ "loss": 10.18,
508
+ "step": 183
509
+ },
510
+ {
511
+ "epoch": 2.2537764350453173,
512
+ "grad_norm": 0.22038479149341583,
513
+ "learning_rate": 1.6186853286758397e-05,
514
+ "loss": 10.1813,
515
+ "step": 186
516
+ },
517
+ {
518
+ "epoch": 2.290030211480363,
519
+ "grad_norm": 0.17609906196594238,
520
+ "learning_rate": 1.4761035809979395e-05,
521
+ "loss": 10.1798,
522
+ "step": 189
523
+ },
524
+ {
525
+ "epoch": 2.290030211480363,
526
+ "eval_loss": 10.172968864440918,
527
+ "eval_runtime": 0.2591,
528
+ "eval_samples_per_second": 540.238,
529
+ "eval_steps_per_second": 69.459,
530
+ "step": 189
531
  }
532
  ],
533
  "logging_steps": 3,
 
547
  "attributes": {}
548
  }
549
  },
550
+ "total_flos": 21083074854912.0,
551
  "train_batch_size": 8,
552
  "trial_name": null,
553
  "trial_params": null