yuweiiizz commited on
Commit
d14bdc2
1 Parent(s): 3066f79

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb108468618613da410f32d3c1102e0e212e266e64c2e56ee7cf5e84d8cc96c1
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdc164423f2a3f84efc5dfbeba06f3615eef5f9d37c1db29ef7adf3cb00ef228
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ea061beef69a7bdd887c6af179c39c86cc3325feeb1b53c72e45ea18137d3e
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45427cbf6396a019fcab45f20b6872b19cfd38c2afec6948c7f84225ab41122e
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d71302206d43b899646f143116f719fb5792d66b03688f9e46b401ca84b3d40f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9e77c3664e6c2303c974515f610c095940e1b9f1a09380dcd8d25d4c4eb1d05
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e83ce7997959ea1cdcd0690cfc2a2d847f6064f6ac117f44e54d89a50a980253
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9965564127cf0748ae57ecf2b02aba0f15495da8346241873c057a0c14f61d6d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 53.38840841616109,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-2000",
4
- "epoch": 1.2903225806451613,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -585,6 +585,295 @@
585
  "eval_samples_per_second": 2.398,
586
  "eval_steps_per_second": 0.301,
587
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  }
589
  ],
590
  "logging_steps": 25,
@@ -592,7 +881,7 @@
592
  "num_input_tokens_seen": 0,
593
  "num_train_epochs": 3,
594
  "save_steps": 1000,
595
- "total_flos": 9.2332898832384e+18,
596
  "train_batch_size": 16,
597
  "trial_name": null,
598
  "trial_params": null
 
1
  {
2
+ "best_metric": 51.336001032657805,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-3000",
4
+ "epoch": 1.935483870967742,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
585
  "eval_samples_per_second": 2.398,
586
  "eval_steps_per_second": 0.301,
587
  "step": 2000
588
+ },
589
+ {
590
+ "epoch": 1.3064516129032258,
591
+ "grad_norm": 9.99063777923584,
592
+ "learning_rate": 6.272401433691757e-06,
593
+ "loss": 0.588,
594
+ "step": 2025
595
+ },
596
+ {
597
+ "epoch": 1.3225806451612903,
598
+ "grad_norm": 13.123091697692871,
599
+ "learning_rate": 6.212664277180407e-06,
600
+ "loss": 0.5886,
601
+ "step": 2050
602
+ },
603
+ {
604
+ "epoch": 1.3387096774193548,
605
+ "grad_norm": 10.930394172668457,
606
+ "learning_rate": 6.152927120669057e-06,
607
+ "loss": 0.6117,
608
+ "step": 2075
609
+ },
610
+ {
611
+ "epoch": 1.3548387096774195,
612
+ "grad_norm": 12.531543731689453,
613
+ "learning_rate": 6.0931899641577065e-06,
614
+ "loss": 0.5931,
615
+ "step": 2100
616
+ },
617
+ {
618
+ "epoch": 1.370967741935484,
619
+ "grad_norm": 13.16308307647705,
620
+ "learning_rate": 6.033452807646356e-06,
621
+ "loss": 0.598,
622
+ "step": 2125
623
+ },
624
+ {
625
+ "epoch": 1.3870967741935485,
626
+ "grad_norm": 11.17799186706543,
627
+ "learning_rate": 5.973715651135007e-06,
628
+ "loss": 0.6141,
629
+ "step": 2150
630
+ },
631
+ {
632
+ "epoch": 1.403225806451613,
633
+ "grad_norm": 10.640506744384766,
634
+ "learning_rate": 5.9139784946236566e-06,
635
+ "loss": 0.5682,
636
+ "step": 2175
637
+ },
638
+ {
639
+ "epoch": 1.4193548387096775,
640
+ "grad_norm": 11.789594650268555,
641
+ "learning_rate": 5.854241338112307e-06,
642
+ "loss": 0.5598,
643
+ "step": 2200
644
+ },
645
+ {
646
+ "epoch": 1.435483870967742,
647
+ "grad_norm": 11.937474250793457,
648
+ "learning_rate": 5.794504181600956e-06,
649
+ "loss": 0.6344,
650
+ "step": 2225
651
+ },
652
+ {
653
+ "epoch": 1.4516129032258065,
654
+ "grad_norm": 14.106030464172363,
655
+ "learning_rate": 5.734767025089606e-06,
656
+ "loss": 0.5783,
657
+ "step": 2250
658
+ },
659
+ {
660
+ "epoch": 1.467741935483871,
661
+ "grad_norm": 12.365781784057617,
662
+ "learning_rate": 5.675029868578256e-06,
663
+ "loss": 0.6335,
664
+ "step": 2275
665
+ },
666
+ {
667
+ "epoch": 1.4838709677419355,
668
+ "grad_norm": 14.670917510986328,
669
+ "learning_rate": 5.615292712066906e-06,
670
+ "loss": 0.5988,
671
+ "step": 2300
672
+ },
673
+ {
674
+ "epoch": 1.5,
675
+ "grad_norm": 10.45535659790039,
676
+ "learning_rate": 5.555555555555557e-06,
677
+ "loss": 0.5912,
678
+ "step": 2325
679
+ },
680
+ {
681
+ "epoch": 1.5161290322580645,
682
+ "grad_norm": 15.059216499328613,
683
+ "learning_rate": 5.495818399044206e-06,
684
+ "loss": 0.5405,
685
+ "step": 2350
686
+ },
687
+ {
688
+ "epoch": 1.532258064516129,
689
+ "grad_norm": 12.705628395080566,
690
+ "learning_rate": 5.436081242532856e-06,
691
+ "loss": 0.5816,
692
+ "step": 2375
693
+ },
694
+ {
695
+ "epoch": 1.5483870967741935,
696
+ "grad_norm": 14.382452964782715,
697
+ "learning_rate": 5.376344086021506e-06,
698
+ "loss": 0.5437,
699
+ "step": 2400
700
+ },
701
+ {
702
+ "epoch": 1.564516129032258,
703
+ "grad_norm": 10.80752944946289,
704
+ "learning_rate": 5.316606929510155e-06,
705
+ "loss": 0.5975,
706
+ "step": 2425
707
+ },
708
+ {
709
+ "epoch": 1.5806451612903225,
710
+ "grad_norm": 12.146509170532227,
711
+ "learning_rate": 5.2568697729988065e-06,
712
+ "loss": 0.599,
713
+ "step": 2450
714
+ },
715
+ {
716
+ "epoch": 1.596774193548387,
717
+ "grad_norm": 12.145088195800781,
718
+ "learning_rate": 5.197132616487456e-06,
719
+ "loss": 0.6506,
720
+ "step": 2475
721
+ },
722
+ {
723
+ "epoch": 1.6129032258064515,
724
+ "grad_norm": 13.103174209594727,
725
+ "learning_rate": 5.137395459976105e-06,
726
+ "loss": 0.5649,
727
+ "step": 2500
728
+ },
729
+ {
730
+ "epoch": 1.629032258064516,
731
+ "grad_norm": 13.602423667907715,
732
+ "learning_rate": 5.077658303464756e-06,
733
+ "loss": 0.5424,
734
+ "step": 2525
735
+ },
736
+ {
737
+ "epoch": 1.6451612903225805,
738
+ "grad_norm": 14.787790298461914,
739
+ "learning_rate": 5.017921146953405e-06,
740
+ "loss": 0.5628,
741
+ "step": 2550
742
+ },
743
+ {
744
+ "epoch": 1.661290322580645,
745
+ "grad_norm": 11.559283256530762,
746
+ "learning_rate": 4.9581839904420555e-06,
747
+ "loss": 0.6216,
748
+ "step": 2575
749
+ },
750
+ {
751
+ "epoch": 1.6774193548387095,
752
+ "grad_norm": 13.20376968383789,
753
+ "learning_rate": 4.898446833930705e-06,
754
+ "loss": 0.5694,
755
+ "step": 2600
756
+ },
757
+ {
758
+ "epoch": 1.6935483870967742,
759
+ "grad_norm": 9.632781982421875,
760
+ "learning_rate": 4.838709677419355e-06,
761
+ "loss": 0.5808,
762
+ "step": 2625
763
+ },
764
+ {
765
+ "epoch": 1.7096774193548387,
766
+ "grad_norm": 12.304398536682129,
767
+ "learning_rate": 4.7789725209080055e-06,
768
+ "loss": 0.5777,
769
+ "step": 2650
770
+ },
771
+ {
772
+ "epoch": 1.7258064516129032,
773
+ "grad_norm": 11.025238990783691,
774
+ "learning_rate": 4.719235364396655e-06,
775
+ "loss": 0.5964,
776
+ "step": 2675
777
+ },
778
+ {
779
+ "epoch": 1.7419354838709677,
780
+ "grad_norm": 13.640275955200195,
781
+ "learning_rate": 4.659498207885305e-06,
782
+ "loss": 0.5936,
783
+ "step": 2700
784
+ },
785
+ {
786
+ "epoch": 1.7580645161290323,
787
+ "grad_norm": 14.28750991821289,
788
+ "learning_rate": 4.599761051373955e-06,
789
+ "loss": 0.5814,
790
+ "step": 2725
791
+ },
792
+ {
793
+ "epoch": 1.7741935483870968,
794
+ "grad_norm": 14.228248596191406,
795
+ "learning_rate": 4.540023894862605e-06,
796
+ "loss": 0.5881,
797
+ "step": 2750
798
+ },
799
+ {
800
+ "epoch": 1.7903225806451613,
801
+ "grad_norm": 12.126937866210938,
802
+ "learning_rate": 4.480286738351255e-06,
803
+ "loss": 0.5568,
804
+ "step": 2775
805
+ },
806
+ {
807
+ "epoch": 1.8064516129032258,
808
+ "grad_norm": 12.653525352478027,
809
+ "learning_rate": 4.420549581839905e-06,
810
+ "loss": 0.5988,
811
+ "step": 2800
812
+ },
813
+ {
814
+ "epoch": 1.8225806451612905,
815
+ "grad_norm": 10.851930618286133,
816
+ "learning_rate": 4.360812425328555e-06,
817
+ "loss": 0.6073,
818
+ "step": 2825
819
+ },
820
+ {
821
+ "epoch": 1.838709677419355,
822
+ "grad_norm": 12.00724983215332,
823
+ "learning_rate": 4.3010752688172045e-06,
824
+ "loss": 0.5739,
825
+ "step": 2850
826
+ },
827
+ {
828
+ "epoch": 1.8548387096774195,
829
+ "grad_norm": 10.997614860534668,
830
+ "learning_rate": 4.241338112305855e-06,
831
+ "loss": 0.5663,
832
+ "step": 2875
833
+ },
834
+ {
835
+ "epoch": 1.870967741935484,
836
+ "grad_norm": 12.384391784667969,
837
+ "learning_rate": 4.181600955794505e-06,
838
+ "loss": 0.5325,
839
+ "step": 2900
840
+ },
841
+ {
842
+ "epoch": 1.8870967741935485,
843
+ "grad_norm": 10.200772285461426,
844
+ "learning_rate": 4.121863799283155e-06,
845
+ "loss": 0.5918,
846
+ "step": 2925
847
+ },
848
+ {
849
+ "epoch": 1.903225806451613,
850
+ "grad_norm": 13.224651336669922,
851
+ "learning_rate": 4.062126642771804e-06,
852
+ "loss": 0.5399,
853
+ "step": 2950
854
+ },
855
+ {
856
+ "epoch": 1.9193548387096775,
857
+ "grad_norm": 10.611023902893066,
858
+ "learning_rate": 4.002389486260454e-06,
859
+ "loss": 0.5593,
860
+ "step": 2975
861
+ },
862
+ {
863
+ "epoch": 1.935483870967742,
864
+ "grad_norm": 10.110644340515137,
865
+ "learning_rate": 3.942652329749105e-06,
866
+ "loss": 0.5611,
867
+ "step": 3000
868
+ },
869
+ {
870
+ "epoch": 1.935483870967742,
871
+ "eval_cer": 51.336001032657805,
872
+ "eval_loss": 0.6702780723571777,
873
+ "eval_runtime": 963.0475,
874
+ "eval_samples_per_second": 2.369,
875
+ "eval_steps_per_second": 0.297,
876
+ "step": 3000
877
  }
878
  ],
879
  "logging_steps": 25,
 
881
  "num_input_tokens_seen": 0,
882
  "num_train_epochs": 3,
883
  "save_steps": 1000,
884
+ "total_flos": 1.38506562883584e+19,
885
  "train_batch_size": 16,
886
  "trial_name": null,
887
  "trial_params": null