eugenepentland commited on
Commit
17d9288
·
1 Parent(s): 1d94385

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e07d299e8556f45759a51cefc80ab60f11b4435a6c3d2830b75c1728ce26e918
3
  size 272138666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4862ebcdc8d2768d6132069a64ba7dfa94cdbf6455d4d4560f35f71fcc32dfb
3
  size 272138666
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:975291f42c4d710384953d4cfe4672590f726845579c65b66198fecbe47bb8a0
3
  size 136067312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:094d5e98b1a3b1417249f16f63b4282afd3829ec4a31dbc2bc00df331ba5faf1
3
  size 136067312
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7c3a22fbebe192d497504adeca19930f69610194f9083564e5ad1dcb89f25b
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67b5b6dd9206c42165f5188e8effb6be1e40ed13cd768d8b7a1b17575bf03d9e
3
  size 21687
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3855d8fa6386dbbe9784cd0d2ef0d4f3d125bc015c8a434280cc9fb10c02ddfa
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6828f225fa32178c33ef119d710f22e24b0bc0c656e9d474379f8495e0908384
3
  size 21687
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2700b6b72f68d78a9a82a0fa0af255ef55f0dfe1f89492b2a06e2afda41947a7
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35c188ece0af9a1c1c070d68232ff9e3dc42d760df0f2e5e280f4c2013a3e538
3
  size 21687
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab4f190a20a612f512e2d7da78868785029f26e6db21c761c5b6d57d49b58356
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f9da3ceb24bed58a42dc4e81d0a1e02d0fbd589dd70fb982262e3a3a271213
3
  size 21687
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cae67bc9a30770c3941dbfe07da9843791ba5bab3656205e5a6dde786b99294
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05f3bf8804c8e87d47beaafc30681cc2d8abc53f079c3ec27b8405f27eafb62e
3
  size 21687
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71c4be7227d2681654c2a55861ac5720ec8852f165bcc6ec3bd75902eec825b5
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a3224faf90dcc36243bfcd086fe8dbeb1b17d0f25a5e2b7d1d315effde3250
3
  size 21687
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52192c0bf605713181ffb59b2cd08beebca139789b45c1e5a36398d9729053a0
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4feb4ed70cfe7ab739f8d7012e2560e686d994777ba7aa8513abce68d4c42d9a
3
  size 21687
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf7aa842507d2595cb82707ada514ce753e309be84ecb89281793953e84eab8
3
  size 21687
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1934f6a0f7b05c9571484b9dbea2c6fb5ae573367912d95f0629f9b4ebdf3aa7
3
  size 21687
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:848584cfa92f00c6579cefa73971dbab0353aaf997b31aceeb3fcb519193d7f4
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1d49d9630e070befda78ef6b64c8fdc0bb6b5103c4e1c8f4b6fa80c9bcbdbb4
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
  "eval_steps": 25,
6
- "global_step": 412,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -627,13 +627,323 @@
627
  "learning_rate": 3.2871627610744385e-05,
628
  "loss": 0.0255,
629
  "step": 410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
  }
631
  ],
632
  "logging_steps": 5,
633
  "max_steps": 1030,
634
  "num_train_epochs": 5,
635
  "save_steps": 500,
636
- "total_flos": 183700751712256.0,
637
  "trial_name": null,
638
  "trial_params": null
639
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 25,
6
+ "global_step": 618,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
627
  "learning_rate": 3.2871627610744385e-05,
628
  "loss": 0.0255,
629
  "step": 410
630
+ },
631
+ {
632
+ "epoch": 2.01,
633
+ "learning_rate": 3.250884674934208e-05,
634
+ "loss": 0.0377,
635
+ "step": 415
636
+ },
637
+ {
638
+ "epoch": 2.04,
639
+ "learning_rate": 3.214431944087437e-05,
640
+ "loss": 0.0309,
641
+ "step": 420
642
+ },
643
+ {
644
+ "epoch": 2.06,
645
+ "learning_rate": 3.17781304690197e-05,
646
+ "loss": 0.0667,
647
+ "step": 425
648
+ },
649
+ {
650
+ "epoch": 2.06,
651
+ "eval_loss": 0.07217594981193542,
652
+ "eval_runtime": 0.1187,
653
+ "eval_samples_per_second": 3462.503,
654
+ "eval_steps_per_second": 109.52,
655
+ "step": 425
656
+ },
657
+ {
658
+ "epoch": 2.09,
659
+ "learning_rate": 3.14103650039349e-05,
660
+ "loss": 0.0339,
661
+ "step": 430
662
+ },
663
+ {
664
+ "epoch": 2.11,
665
+ "learning_rate": 3.104110858244588e-05,
666
+ "loss": 0.0068,
667
+ "step": 435
668
+ },
669
+ {
670
+ "epoch": 2.14,
671
+ "learning_rate": 3.0670447088153055e-05,
672
+ "loss": 0.0522,
673
+ "step": 440
674
+ },
675
+ {
676
+ "epoch": 2.16,
677
+ "learning_rate": 3.029846673145604e-05,
678
+ "loss": 0.0068,
679
+ "step": 445
680
+ },
681
+ {
682
+ "epoch": 2.18,
683
+ "learning_rate": 2.99252540295024e-05,
684
+ "loss": 0.0514,
685
+ "step": 450
686
+ },
687
+ {
688
+ "epoch": 2.18,
689
+ "eval_loss": 0.05946213752031326,
690
+ "eval_runtime": 0.1357,
691
+ "eval_samples_per_second": 3027.628,
692
+ "eval_steps_per_second": 95.764,
693
+ "step": 450
694
+ },
695
+ {
696
+ "epoch": 2.21,
697
+ "learning_rate": 2.955089578606506e-05,
698
+ "loss": 0.022,
699
+ "step": 455
700
+ },
701
+ {
702
+ "epoch": 2.23,
703
+ "learning_rate": 2.9175479071352996e-05,
704
+ "loss": 0.058,
705
+ "step": 460
706
+ },
707
+ {
708
+ "epoch": 2.26,
709
+ "learning_rate": 2.8799091201760008e-05,
710
+ "loss": 0.0262,
711
+ "step": 465
712
+ },
713
+ {
714
+ "epoch": 2.28,
715
+ "learning_rate": 2.8421819719556205e-05,
716
+ "loss": 0.0108,
717
+ "step": 470
718
+ },
719
+ {
720
+ "epoch": 2.31,
721
+ "learning_rate": 2.804375237252694e-05,
722
+ "loss": 0.0286,
723
+ "step": 475
724
+ },
725
+ {
726
+ "epoch": 2.31,
727
+ "eval_loss": 0.04657759144902229,
728
+ "eval_runtime": 0.1207,
729
+ "eval_samples_per_second": 3405.261,
730
+ "eval_steps_per_second": 107.709,
731
+ "step": 475
732
+ },
733
+ {
734
+ "epoch": 2.33,
735
+ "learning_rate": 2.7664977093564022e-05,
736
+ "loss": 0.0122,
737
+ "step": 480
738
+ },
739
+ {
740
+ "epoch": 2.35,
741
+ "learning_rate": 2.728558198021378e-05,
742
+ "loss": 0.0286,
743
+ "step": 485
744
+ },
745
+ {
746
+ "epoch": 2.38,
747
+ "learning_rate": 2.6905655274186892e-05,
748
+ "loss": 0.0284,
749
+ "step": 490
750
+ },
751
+ {
752
+ "epoch": 2.4,
753
+ "learning_rate": 2.6525285340834644e-05,
754
+ "loss": 0.0223,
755
+ "step": 495
756
+ },
757
+ {
758
+ "epoch": 2.43,
759
+ "learning_rate": 2.6144560648596466e-05,
760
+ "loss": 0.0349,
761
+ "step": 500
762
+ },
763
+ {
764
+ "epoch": 2.43,
765
+ "eval_loss": 0.05097094178199768,
766
+ "eval_runtime": 0.1152,
767
+ "eval_samples_per_second": 3566.29,
768
+ "eval_steps_per_second": 112.802,
769
+ "step": 500
770
+ },
771
+ {
772
+ "epoch": 2.45,
773
+ "learning_rate": 2.576356974842349e-05,
774
+ "loss": 0.031,
775
+ "step": 505
776
+ },
777
+ {
778
+ "epoch": 2.48,
779
+ "learning_rate": 2.538240125318287e-05,
780
+ "loss": 0.0534,
781
+ "step": 510
782
+ },
783
+ {
784
+ "epoch": 2.5,
785
+ "learning_rate": 2.5001143817047762e-05,
786
+ "loss": 0.04,
787
+ "step": 515
788
+ },
789
+ {
790
+ "epoch": 2.52,
791
+ "learning_rate": 2.4619886114877678e-05,
792
+ "loss": 0.0463,
793
+ "step": 520
794
+ },
795
+ {
796
+ "epoch": 2.55,
797
+ "learning_rate": 2.4238716821593986e-05,
798
+ "loss": 0.0231,
799
+ "step": 525
800
+ },
801
+ {
802
+ "epoch": 2.55,
803
+ "eval_loss": 0.04907093197107315,
804
+ "eval_runtime": 0.1367,
805
+ "eval_samples_per_second": 3007.163,
806
+ "eval_steps_per_second": 95.117,
807
+ "step": 525
808
+ },
809
+ {
810
+ "epoch": 2.57,
811
+ "learning_rate": 2.3857724591555443e-05,
812
+ "loss": 0.0292,
813
+ "step": 530
814
+ },
815
+ {
816
+ "epoch": 2.6,
817
+ "learning_rate": 2.347699803793854e-05,
818
+ "loss": 0.0527,
819
+ "step": 535
820
+ },
821
+ {
822
+ "epoch": 2.62,
823
+ "learning_rate": 2.309662571212731e-05,
824
+ "loss": 0.0424,
825
+ "step": 540
826
+ },
827
+ {
828
+ "epoch": 2.65,
829
+ "learning_rate": 2.2716696083117664e-05,
830
+ "loss": 0.0363,
831
+ "step": 545
832
+ },
833
+ {
834
+ "epoch": 2.67,
835
+ "learning_rate": 2.233729751694071e-05,
836
+ "loss": 0.0301,
837
+ "step": 550
838
+ },
839
+ {
840
+ "epoch": 2.67,
841
+ "eval_loss": 0.04202725738286972,
842
+ "eval_runtime": 0.1194,
843
+ "eval_samples_per_second": 3441.042,
844
+ "eval_steps_per_second": 108.841,
845
+ "step": 550
846
+ },
847
+ {
848
+ "epoch": 2.69,
849
+ "learning_rate": 2.1958518256110213e-05,
850
+ "loss": 0.0638,
851
+ "step": 555
852
+ },
853
+ {
854
+ "epoch": 2.72,
855
+ "learning_rate": 2.1580446399098604e-05,
856
+ "loss": 0.0684,
857
+ "step": 560
858
+ },
859
+ {
860
+ "epoch": 2.74,
861
+ "learning_rate": 2.1203169879846667e-05,
862
+ "loss": 0.0249,
863
+ "step": 565
864
+ },
865
+ {
866
+ "epoch": 2.77,
867
+ "learning_rate": 2.0826776447311296e-05,
868
+ "loss": 0.0454,
869
+ "step": 570
870
+ },
871
+ {
872
+ "epoch": 2.79,
873
+ "learning_rate": 2.0451353645056464e-05,
874
+ "loss": 0.0617,
875
+ "step": 575
876
+ },
877
+ {
878
+ "epoch": 2.79,
879
+ "eval_loss": 0.03725917637348175,
880
+ "eval_runtime": 0.1154,
881
+ "eval_samples_per_second": 3562.318,
882
+ "eval_steps_per_second": 112.677,
883
+ "step": 575
884
+ },
885
+ {
886
+ "epoch": 2.82,
887
+ "learning_rate": 2.0076988790891832e-05,
888
+ "loss": 0.0323,
889
+ "step": 580
890
+ },
891
+ {
892
+ "epoch": 2.84,
893
+ "learning_rate": 1.970376895656394e-05,
894
+ "loss": 0.0442,
895
+ "step": 585
896
+ },
897
+ {
898
+ "epoch": 2.86,
899
+ "learning_rate": 1.9331780947504582e-05,
900
+ "loss": 0.0052,
901
+ "step": 590
902
+ },
903
+ {
904
+ "epoch": 2.89,
905
+ "learning_rate": 1.8961111282641173e-05,
906
+ "loss": 0.0171,
907
+ "step": 595
908
+ },
909
+ {
910
+ "epoch": 2.91,
911
+ "learning_rate": 1.859184617427367e-05,
912
+ "loss": 0.0305,
913
+ "step": 600
914
+ },
915
+ {
916
+ "epoch": 2.91,
917
+ "eval_loss": 0.054718561470508575,
918
+ "eval_runtime": 0.144,
919
+ "eval_samples_per_second": 2853.575,
920
+ "eval_steps_per_second": 90.259,
921
+ "step": 600
922
+ },
923
+ {
924
+ "epoch": 2.94,
925
+ "learning_rate": 1.822407150802294e-05,
926
+ "loss": 0.0405,
927
+ "step": 605
928
+ },
929
+ {
930
+ "epoch": 2.96,
931
+ "learning_rate": 1.7857872822854935e-05,
932
+ "loss": 0.0273,
933
+ "step": 610
934
+ },
935
+ {
936
+ "epoch": 2.99,
937
+ "learning_rate": 1.7493335291185675e-05,
938
+ "loss": 0.0324,
939
+ "step": 615
940
  }
941
  ],
942
  "logging_steps": 5,
943
  "max_steps": 1030,
944
  "num_train_epochs": 5,
945
  "save_steps": 500,
946
+ "total_flos": 275559470923776.0,
947
  "trial_name": null,
948
  "trial_params": null
949
  }