ardaspear commited on
Commit
5dafa23
·
verified ·
1 Parent(s): 4fd8e28

Training in progress, step 272, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94712826a7f2c0b7756c1d1754ed196d80b21d0e57551e1ca4f6dcd8cb62a183
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7e6d9652442fa642063424b204e43fa7e18c77cf6990911220075103af0998
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c760d568f970c9090c6b12152f53f090933d3e37f77d860d804763cad359c610
3
- size 37134420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b739aced26e02b72f0f4376d4b8e542febe656c5c4972b6b6ed9a4685b22ac00
3
+ size 37134740
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:914b1fa34e1d3c8a9975e1e5a238a3740456137dacecfe916285c613ce13f0db
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecc873a6b8b4dda97e1cfd6b2b58261bd5e2dd33f35d84c167b41efe0afbcd31
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbcef9424696e41c7961bd91f0570d39d59ef33af28ed19a0eb9e4f50ed1b09a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3520b6e9bfde48b403dd6f4096e526132e910f4d92bd802fb2e831d46f8ad41f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06767131077622975,
5
  "eval_steps": 34,
6
- "global_step": 238,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -624,6 +624,91 @@
624
  "eval_samples_per_second": 35.29,
625
  "eval_steps_per_second": 4.414,
626
  "step": 238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  }
628
  ],
629
  "logging_steps": 3,
@@ -643,7 +728,7 @@
643
  "attributes": {}
644
  }
645
  },
646
- "total_flos": 7.62160041909289e+16,
647
  "train_batch_size": 8,
648
  "trial_name": null,
649
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07733864088711971,
5
  "eval_steps": 34,
6
+ "global_step": 272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
624
  "eval_samples_per_second": 35.29,
625
  "eval_steps_per_second": 4.414,
626
  "step": 238
627
+ },
628
+ {
629
+ "epoch": 0.06823997725334091,
630
+ "grad_norm": 0.4818466603755951,
631
+ "learning_rate": 1.8044563402088684e-05,
632
+ "loss": 0.5623,
633
+ "step": 240
634
+ },
635
+ {
636
+ "epoch": 0.06909297696900768,
637
+ "grad_norm": 0.45440998673439026,
638
+ "learning_rate": 1.746635141803761e-05,
639
+ "loss": 0.5626,
640
+ "step": 243
641
+ },
642
+ {
643
+ "epoch": 0.06994597668467444,
644
+ "grad_norm": 0.5940297842025757,
645
+ "learning_rate": 1.6892538872607937e-05,
646
+ "loss": 0.5264,
647
+ "step": 246
648
+ },
649
+ {
650
+ "epoch": 0.0707989764003412,
651
+ "grad_norm": 0.41404712200164795,
652
+ "learning_rate": 1.6323460856167426e-05,
653
+ "loss": 0.4484,
654
+ "step": 249
655
+ },
656
+ {
657
+ "epoch": 0.07165197611600796,
658
+ "grad_norm": 0.44981393218040466,
659
+ "learning_rate": 1.5759449694252226e-05,
660
+ "loss": 0.473,
661
+ "step": 252
662
+ },
663
+ {
664
+ "epoch": 0.07250497583167473,
665
+ "grad_norm": 0.6551511883735657,
666
+ "learning_rate": 1.5200834753498128e-05,
667
+ "loss": 0.4956,
668
+ "step": 255
669
+ },
670
+ {
671
+ "epoch": 0.07335797554734148,
672
+ "grad_norm": 0.5032558441162109,
673
+ "learning_rate": 1.4647942249299707e-05,
674
+ "loss": 0.5262,
675
+ "step": 258
676
+ },
677
+ {
678
+ "epoch": 0.07421097526300825,
679
+ "grad_norm": 0.5430291891098022,
680
+ "learning_rate": 1.4101095055309746e-05,
681
+ "loss": 0.4899,
682
+ "step": 261
683
+ },
684
+ {
685
+ "epoch": 0.07506397497867501,
686
+ "grad_norm": 0.5020308494567871,
687
+ "learning_rate": 1.356061251489012e-05,
688
+ "loss": 0.494,
689
+ "step": 264
690
+ },
691
+ {
692
+ "epoch": 0.07591697469434176,
693
+ "grad_norm": 0.5282646417617798,
694
+ "learning_rate": 1.302681025462424e-05,
695
+ "loss": 0.5339,
696
+ "step": 267
697
+ },
698
+ {
699
+ "epoch": 0.07676997441000853,
700
+ "grad_norm": 0.48478028178215027,
701
+ "learning_rate": 1.2500000000000006e-05,
702
+ "loss": 0.5133,
703
+ "step": 270
704
+ },
705
+ {
706
+ "epoch": 0.07733864088711971,
707
+ "eval_loss": 0.4457505941390991,
708
+ "eval_runtime": 168.037,
709
+ "eval_samples_per_second": 35.254,
710
+ "eval_steps_per_second": 4.41,
711
+ "step": 272
712
  }
713
  ],
714
  "logging_steps": 3,
 
728
  "attributes": {}
729
  }
730
  },
731
+ "total_flos": 8.710400478963302e+16,
732
  "train_batch_size": 8,
733
  "trial_name": null,
734
  "trial_params": null