Seosnaps commited on
Commit
44fe5e6
1 Parent(s): a2fae54

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a884e453f4f9292aeefb259505fd0be4d2548b45c218358b5961ea274f55d6e
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60e2e575c9978e0260ef247845a4c1daf3f51ceb48a7525a63c27ba51b78b0b3
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:517a8d2484fb9208abfae412945a031ba10eed01d1edfcc4e106568bb4c14b87
3
  size 1925070764
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c1b3b2a0bb7a2f926ac474269f9e991cd89d84f4d190a7292f6bd37860df3a2
3
  size 1925070764
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b23f626a7efa36d01f5e36f3f34d543aac465661afc2ed75e47913bc2ba74c7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad752088b229d78039d00fa98cec499de1d448da781bc7460fcfe8880b39ae1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3ea9d13baff2282d300ceb3c3984a3388d1450303ffc8640c73967fa3325903
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f0b34b2ac94b6ae1b1e612c27fa9cd4fd3034532b792dc74af68839fa9ffe62
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 80.09197164207703,
3
  "best_model_checkpoint": "./whisper-small-ha-adam-v5/checkpoint-2000",
4
- "epoch": 12.738853503184714,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,156 @@
607
  "eval_wer": 80.09197164207703,
608
  "eval_wer_ortho": 81.9921875,
609
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 25,
@@ -626,7 +776,7 @@
626
  "attributes": {}
627
  }
628
  },
629
- "total_flos": 9.22088071102464e+18,
630
  "train_batch_size": 16,
631
  "trial_name": null,
632
  "trial_params": null
 
1
  {
2
  "best_metric": 80.09197164207703,
3
  "best_model_checkpoint": "./whisper-small-ha-adam-v5/checkpoint-2000",
4
+ "epoch": 15.923566878980893,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_wer": 80.09197164207703,
608
  "eval_wer_ortho": 81.9921875,
609
  "step": 2000
610
+ },
611
+ {
612
+ "epoch": 12.898089171974522,
613
+ "grad_norm": 2.6092240810394287,
614
+ "learning_rate": 5e-05,
615
+ "loss": 0.0373,
616
+ "step": 2025
617
+ },
618
+ {
619
+ "epoch": 13.05732484076433,
620
+ "grad_norm": 3.123652935028076,
621
+ "learning_rate": 5e-05,
622
+ "loss": 0.0293,
623
+ "step": 2050
624
+ },
625
+ {
626
+ "epoch": 13.21656050955414,
627
+ "grad_norm": 2.5134246349334717,
628
+ "learning_rate": 5e-05,
629
+ "loss": 0.0268,
630
+ "step": 2075
631
+ },
632
+ {
633
+ "epoch": 13.375796178343949,
634
+ "grad_norm": 1.4545310735702515,
635
+ "learning_rate": 5e-05,
636
+ "loss": 0.0294,
637
+ "step": 2100
638
+ },
639
+ {
640
+ "epoch": 13.535031847133759,
641
+ "grad_norm": 2.473706007003784,
642
+ "learning_rate": 5e-05,
643
+ "loss": 0.0307,
644
+ "step": 2125
645
+ },
646
+ {
647
+ "epoch": 13.694267515923567,
648
+ "grad_norm": 2.8176300525665283,
649
+ "learning_rate": 5e-05,
650
+ "loss": 0.0279,
651
+ "step": 2150
652
+ },
653
+ {
654
+ "epoch": 13.853503184713375,
655
+ "grad_norm": 38.75226974487305,
656
+ "learning_rate": 5e-05,
657
+ "loss": 0.0454,
658
+ "step": 2175
659
+ },
660
+ {
661
+ "epoch": 14.012738853503185,
662
+ "grad_norm": 0.736247181892395,
663
+ "learning_rate": 5e-05,
664
+ "loss": 0.0247,
665
+ "step": 2200
666
+ },
667
+ {
668
+ "epoch": 14.171974522292993,
669
+ "grad_norm": 2.7903378009796143,
670
+ "learning_rate": 5e-05,
671
+ "loss": 0.0289,
672
+ "step": 2225
673
+ },
674
+ {
675
+ "epoch": 14.331210191082803,
676
+ "grad_norm": 2.184035301208496,
677
+ "learning_rate": 5e-05,
678
+ "loss": 0.0216,
679
+ "step": 2250
680
+ },
681
+ {
682
+ "epoch": 14.490445859872612,
683
+ "grad_norm": 2.464597702026367,
684
+ "learning_rate": 5e-05,
685
+ "loss": 0.0275,
686
+ "step": 2275
687
+ },
688
+ {
689
+ "epoch": 14.64968152866242,
690
+ "grad_norm": 4.4987335205078125,
691
+ "learning_rate": 5e-05,
692
+ "loss": 0.0374,
693
+ "step": 2300
694
+ },
695
+ {
696
+ "epoch": 14.80891719745223,
697
+ "grad_norm": 2.5459258556365967,
698
+ "learning_rate": 5e-05,
699
+ "loss": 0.031,
700
+ "step": 2325
701
+ },
702
+ {
703
+ "epoch": 14.968152866242038,
704
+ "grad_norm": 2.8609278202056885,
705
+ "learning_rate": 5e-05,
706
+ "loss": 0.0338,
707
+ "step": 2350
708
+ },
709
+ {
710
+ "epoch": 15.127388535031848,
711
+ "grad_norm": 0.5692533850669861,
712
+ "learning_rate": 5e-05,
713
+ "loss": 0.0154,
714
+ "step": 2375
715
+ },
716
+ {
717
+ "epoch": 15.286624203821656,
718
+ "grad_norm": 1.785417914390564,
719
+ "learning_rate": 5e-05,
720
+ "loss": 0.0285,
721
+ "step": 2400
722
+ },
723
+ {
724
+ "epoch": 15.445859872611464,
725
+ "grad_norm": 3.1533737182617188,
726
+ "learning_rate": 5e-05,
727
+ "loss": 0.027,
728
+ "step": 2425
729
+ },
730
+ {
731
+ "epoch": 15.605095541401274,
732
+ "grad_norm": 0.5182532072067261,
733
+ "learning_rate": 5e-05,
734
+ "loss": 0.0293,
735
+ "step": 2450
736
+ },
737
+ {
738
+ "epoch": 15.764331210191083,
739
+ "grad_norm": 2.0394535064697266,
740
+ "learning_rate": 5e-05,
741
+ "loss": 0.0316,
742
+ "step": 2475
743
+ },
744
+ {
745
+ "epoch": 15.923566878980893,
746
+ "grad_norm": 0.6453192234039307,
747
+ "learning_rate": 5e-05,
748
+ "loss": 0.0213,
749
+ "step": 2500
750
+ },
751
+ {
752
+ "epoch": 15.923566878980893,
753
+ "eval_loss": 2.2915444374084473,
754
+ "eval_runtime": 239.3393,
755
+ "eval_samples_per_second": 2.758,
756
+ "eval_steps_per_second": 0.175,
757
+ "eval_wer": 80.80091971642076,
758
+ "eval_wer_ortho": 82.578125,
759
+ "step": 2500
760
  }
761
  ],
762
  "logging_steps": 25,
 
776
  "attributes": {}
777
  }
778
  },
779
+ "total_flos": 1.15261008887808e+19,
780
  "train_batch_size": 16,
781
  "trial_name": null,
782
  "trial_params": null