RoyJoy commited on
Commit
cd20563
·
verified ·
1 Parent(s): 992432d

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ea9bbaa9f836e88548307399cd646ed9a9e416b5de868e1aa57e4c6da5e3452
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe87b7f23a8620b30ebfbc17310fdf64b065c3b1a22de0cbe4e08604e02bc988
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9247931af67bd9a86e518ee6d4ef73669be7902d5224f399ccb69ba606d7bc86
3
  size 141053442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b39c5b7295e0b751ad1c0bbef4163c678467b44bb4c378ae5d258a7679b01a9b
3
  size 141053442
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599ddcee07d5a861ece0f4de1903724e4aca1c3286a9c2cbd77dcae86ba5fdf1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6089676789037860dbb4a886aeb4957073983ed4d05ee9f00b3701d6c3be381b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56e481d3f13ff36459d8279663c80a3f95d42710e9f0e63fccf59b1bd807e90b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84bbf0db14e2efd4b4c26bd887f7ba654e43e5343f4efe1d325190e985117acc
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a6844d3eabdd14fe6c38335fcbc9f23cfe6cfecd9d4e6657f75d84170558131
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85ee4c1bb54c798ef97374b8f37491b4dcee21def5302c1e153f1caa94c9a9d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7454acfb1745ea0de3b382e69cb8a8d7b6de3387a7648d0332213c440912038
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cde5ec8c22d04aa0c3169d2dcd02cb11605370753522b7c73a4f7c104fb9337
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:344fc18df9c84f214bc47e69d1bbde70d66cf3ec6caeac9cd529dfb5814c4b9a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:051dee7dfbeecb34b46e8409ffafec324501f465585234624669bc8c9e863ae4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.4749317169189453,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-75",
4
- "epoch": 1.0530934620447565,
5
  "eval_steps": 25,
6
- "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -564,6 +564,189 @@
564
  "eval_samples_per_second": 99.064,
565
  "eval_steps_per_second": 25.757,
566
  "step": 75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  }
568
  ],
569
  "logging_steps": 1,
@@ -587,12 +770,12 @@
587
  "should_evaluate": false,
588
  "should_log": false,
589
  "should_save": true,
590
- "should_training_stop": false
591
  },
592
  "attributes": {}
593
  }
594
  },
595
- "total_flos": 4.42951368966144e+16,
596
  "train_batch_size": 1,
597
  "trial_name": null,
598
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.4680612087249756,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.4041246160596752,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
564
  "eval_samples_per_second": 99.064,
565
  "eval_steps_per_second": 25.757,
566
  "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.0671347082053533,
570
+ "grad_norm": 0.7719098925590515,
571
+ "learning_rate": 2.3444344707738015e-05,
572
+ "loss": 2.3545,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.08117595436595,
577
+ "grad_norm": 0.8498649001121521,
578
+ "learning_rate": 2.2400865784401e-05,
579
+ "loss": 2.5526,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.0952172005265468,
584
+ "grad_norm": 1.025146484375,
585
+ "learning_rate": 2.1393033535713093e-05,
586
+ "loss": 2.88,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.1092584466871436,
591
+ "grad_norm": 1.3467072248458862,
592
+ "learning_rate": 2.0421950011441354e-05,
593
+ "loss": 2.7158,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.1232996928477403,
598
+ "grad_norm": 1.4670014381408691,
599
+ "learning_rate": 1.9488677077162295e-05,
600
+ "loss": 2.8973,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.1373409390083369,
605
+ "grad_norm": 1.7755794525146484,
606
+ "learning_rate": 1.8594235253127375e-05,
607
+ "loss": 2.9892,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 1.1513821851689336,
612
+ "grad_norm": 2.5536394119262695,
613
+ "learning_rate": 1.77396025983391e-05,
614
+ "loss": 3.3171,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 1.1654234313295304,
619
+ "grad_norm": 3.319047451019287,
620
+ "learning_rate": 1.6925713641057904e-05,
621
+ "loss": 3.4315,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 1.1794646774901272,
626
+ "grad_norm": 1.3052607774734497,
627
+ "learning_rate": 1.6153458356909176e-05,
628
+ "loss": 2.2188,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 1.193505923650724,
633
+ "grad_norm": 0.33176228404045105,
634
+ "learning_rate": 1.5423681195707997e-05,
635
+ "loss": 2.1613,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 1.2075471698113207,
640
+ "grad_norm": 0.4036065340042114,
641
+ "learning_rate": 1.4737180158065644e-05,
642
+ "loss": 1.8675,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 1.2215884159719175,
647
+ "grad_norm": 0.5903648734092712,
648
+ "learning_rate": 1.4094705922787687e-05,
649
+ "loss": 1.971,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 1.2356296621325142,
654
+ "grad_norm": 0.816626250743866,
655
+ "learning_rate": 1.3496961026017687e-05,
656
+ "loss": 2.235,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 1.249670908293111,
661
+ "grad_norm": 0.8417714834213257,
662
+ "learning_rate": 1.2944599093024267e-05,
663
+ "loss": 2.3109,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 1.2637121544537078,
668
+ "grad_norm": 0.9843570590019226,
669
+ "learning_rate": 1.2438224123471442e-05,
670
+ "loss": 2.4662,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 1.2777534006143045,
675
+ "grad_norm": 1.1565916538238525,
676
+ "learning_rate": 1.1978389830953907e-05,
677
+ "loss": 2.7704,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 1.2917946467749013,
682
+ "grad_norm": 1.2723060846328735,
683
+ "learning_rate": 1.1565599037519316e-05,
684
+ "loss": 2.7589,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 1.305835892935498,
689
+ "grad_norm": 1.5614339113235474,
690
+ "learning_rate": 1.1200303123839742e-05,
691
+ "loss": 2.9048,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 1.3198771390960948,
696
+ "grad_norm": 1.889614462852478,
697
+ "learning_rate": 1.088290153563358e-05,
698
+ "loss": 3.2427,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 1.3339183852566916,
703
+ "grad_norm": 2.9528310298919678,
704
+ "learning_rate": 1.0613741346877497e-05,
705
+ "loss": 3.3262,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 1.3479596314172884,
710
+ "grad_norm": 6.621575832366943,
711
+ "learning_rate": 1.0393116880286118e-05,
712
+ "loss": 4.0112,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 1.3620008775778851,
717
+ "grad_norm": 2.7021443843841553,
718
+ "learning_rate": 1.0221269385474488e-05,
719
+ "loss": 2.6302,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 1.3760421237384817,
724
+ "grad_norm": 0.3131318986415863,
725
+ "learning_rate": 1.0098386775155147e-05,
726
+ "loss": 1.4645,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 1.3900833698990787,
731
+ "grad_norm": 0.44030845165252686,
732
+ "learning_rate": 1.0024603419658329e-05,
733
+ "loss": 1.8176,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 1.4041246160596752,
738
+ "grad_norm": 0.6400426626205444,
739
+ "learning_rate": 1e-05,
740
+ "loss": 2.0536,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 1.4041246160596752,
745
+ "eval_loss": 2.4680612087249756,
746
+ "eval_runtime": 0.5034,
747
+ "eval_samples_per_second": 99.333,
748
+ "eval_steps_per_second": 25.827,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
+ "should_training_stop": true
774
  },
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 5.90601825288192e+16,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null