yuweiiizz commited on
Commit
2194917
1 Parent(s): a425f36

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdc164423f2a3f84efc5dfbeba06f3615eef5f9d37c1db29ef7adf3cb00ef228
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bfea1f8ccff1d3d104539ef9c86c38d6670980c839e6047b65be8f2eae783c8
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45427cbf6396a019fcab45f20b6872b19cfd38c2afec6948c7f84225ab41122e
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca089eb0a1d5699b01f559f18d4bede6fbd50e2cda9b1cb1676c3c5548889ceb
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9e77c3664e6c2303c974515f610c095940e1b9f1a09380dcd8d25d4c4eb1d05
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c93a397e9322e49f4ed50d18f810eaf2c39ecdb2985c95d248cd7a2fa2aa47
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9965564127cf0748ae57ecf2b02aba0f15495da8346241873c057a0c14f61d6d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd54311344b834087a4b1c20d06544579c7f43d33908960b6b3b61734dbde46d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 51.336001032657805,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-3000",
4
- "epoch": 1.935483870967742,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -874,6 +874,295 @@
874
  "eval_samples_per_second": 2.369,
875
  "eval_steps_per_second": 0.297,
876
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
  }
878
  ],
879
  "logging_steps": 25,
@@ -881,7 +1170,7 @@
881
  "num_input_tokens_seen": 0,
882
  "num_train_epochs": 3,
883
  "save_steps": 1000,
884
- "total_flos": 1.38506562883584e+19,
885
  "train_batch_size": 16,
886
  "trial_name": null,
887
  "trial_params": null
 
1
  {
2
+ "best_metric": 48.63818252226668,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
4
+ "epoch": 2.5806451612903225,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
874
  "eval_samples_per_second": 2.369,
875
  "eval_steps_per_second": 0.297,
876
  "step": 3000
877
+ },
878
+ {
879
+ "epoch": 1.9516129032258065,
880
+ "grad_norm": 10.685456275939941,
881
+ "learning_rate": 3.882915173237754e-06,
882
+ "loss": 0.5326,
883
+ "step": 3025
884
+ },
885
+ {
886
+ "epoch": 1.967741935483871,
887
+ "grad_norm": 14.404354095458984,
888
+ "learning_rate": 3.823178016726404e-06,
889
+ "loss": 0.5828,
890
+ "step": 3050
891
+ },
892
+ {
893
+ "epoch": 1.9838709677419355,
894
+ "grad_norm": 13.997696876525879,
895
+ "learning_rate": 3.763440860215054e-06,
896
+ "loss": 0.5394,
897
+ "step": 3075
898
+ },
899
+ {
900
+ "epoch": 2.0,
901
+ "grad_norm": 16.121444702148438,
902
+ "learning_rate": 3.7037037037037037e-06,
903
+ "loss": 0.5635,
904
+ "step": 3100
905
+ },
906
+ {
907
+ "epoch": 2.0161290322580645,
908
+ "grad_norm": 9.237725257873535,
909
+ "learning_rate": 3.643966547192354e-06,
910
+ "loss": 0.3737,
911
+ "step": 3125
912
+ },
913
+ {
914
+ "epoch": 2.032258064516129,
915
+ "grad_norm": 11.313372611999512,
916
+ "learning_rate": 3.584229390681004e-06,
917
+ "loss": 0.3934,
918
+ "step": 3150
919
+ },
920
+ {
921
+ "epoch": 2.0483870967741935,
922
+ "grad_norm": 9.819090843200684,
923
+ "learning_rate": 3.5244922341696534e-06,
924
+ "loss": 0.3494,
925
+ "step": 3175
926
+ },
927
+ {
928
+ "epoch": 2.064516129032258,
929
+ "grad_norm": 9.302324295043945,
930
+ "learning_rate": 3.4647550776583037e-06,
931
+ "loss": 0.3691,
932
+ "step": 3200
933
+ },
934
+ {
935
+ "epoch": 2.0806451612903225,
936
+ "grad_norm": 11.517475128173828,
937
+ "learning_rate": 3.4050179211469536e-06,
938
+ "loss": 0.3652,
939
+ "step": 3225
940
+ },
941
+ {
942
+ "epoch": 2.096774193548387,
943
+ "grad_norm": 7.707530975341797,
944
+ "learning_rate": 3.3452807646356034e-06,
945
+ "loss": 0.3566,
946
+ "step": 3250
947
+ },
948
+ {
949
+ "epoch": 2.1129032258064515,
950
+ "grad_norm": 9.121161460876465,
951
+ "learning_rate": 3.2855436081242537e-06,
952
+ "loss": 0.3409,
953
+ "step": 3275
954
+ },
955
+ {
956
+ "epoch": 2.129032258064516,
957
+ "grad_norm": 10.464853286743164,
958
+ "learning_rate": 3.225806451612903e-06,
959
+ "loss": 0.33,
960
+ "step": 3300
961
+ },
962
+ {
963
+ "epoch": 2.1451612903225805,
964
+ "grad_norm": 8.300515174865723,
965
+ "learning_rate": 3.1660692951015535e-06,
966
+ "loss": 0.3436,
967
+ "step": 3325
968
+ },
969
+ {
970
+ "epoch": 2.161290322580645,
971
+ "grad_norm": 7.577033519744873,
972
+ "learning_rate": 3.1063321385902034e-06,
973
+ "loss": 0.3441,
974
+ "step": 3350
975
+ },
976
+ {
977
+ "epoch": 2.1774193548387095,
978
+ "grad_norm": 12.314337730407715,
979
+ "learning_rate": 3.0465949820788532e-06,
980
+ "loss": 0.387,
981
+ "step": 3375
982
+ },
983
+ {
984
+ "epoch": 2.193548387096774,
985
+ "grad_norm": 8.03864860534668,
986
+ "learning_rate": 2.9868578255675035e-06,
987
+ "loss": 0.3533,
988
+ "step": 3400
989
+ },
990
+ {
991
+ "epoch": 2.2096774193548385,
992
+ "grad_norm": 10.326530456542969,
993
+ "learning_rate": 2.9271206690561534e-06,
994
+ "loss": 0.351,
995
+ "step": 3425
996
+ },
997
+ {
998
+ "epoch": 2.225806451612903,
999
+ "grad_norm": 8.268649101257324,
1000
+ "learning_rate": 2.867383512544803e-06,
1001
+ "loss": 0.3437,
1002
+ "step": 3450
1003
+ },
1004
+ {
1005
+ "epoch": 2.241935483870968,
1006
+ "grad_norm": 9.62258529663086,
1007
+ "learning_rate": 2.807646356033453e-06,
1008
+ "loss": 0.3254,
1009
+ "step": 3475
1010
+ },
1011
+ {
1012
+ "epoch": 2.258064516129032,
1013
+ "grad_norm": 8.58535099029541,
1014
+ "learning_rate": 2.747909199522103e-06,
1015
+ "loss": 0.3592,
1016
+ "step": 3500
1017
+ },
1018
+ {
1019
+ "epoch": 2.274193548387097,
1020
+ "grad_norm": 10.211243629455566,
1021
+ "learning_rate": 2.688172043010753e-06,
1022
+ "loss": 0.3334,
1023
+ "step": 3525
1024
+ },
1025
+ {
1026
+ "epoch": 2.2903225806451615,
1027
+ "grad_norm": 9.174546241760254,
1028
+ "learning_rate": 2.6284348864994032e-06,
1029
+ "loss": 0.3533,
1030
+ "step": 3550
1031
+ },
1032
+ {
1033
+ "epoch": 2.306451612903226,
1034
+ "grad_norm": 9.889862060546875,
1035
+ "learning_rate": 2.5686977299880527e-06,
1036
+ "loss": 0.3263,
1037
+ "step": 3575
1038
+ },
1039
+ {
1040
+ "epoch": 2.3225806451612905,
1041
+ "grad_norm": 10.23873519897461,
1042
+ "learning_rate": 2.5089605734767026e-06,
1043
+ "loss": 0.3601,
1044
+ "step": 3600
1045
+ },
1046
+ {
1047
+ "epoch": 2.338709677419355,
1048
+ "grad_norm": 8.46229076385498,
1049
+ "learning_rate": 2.4492234169653525e-06,
1050
+ "loss": 0.335,
1051
+ "step": 3625
1052
+ },
1053
+ {
1054
+ "epoch": 2.3548387096774195,
1055
+ "grad_norm": 8.364771842956543,
1056
+ "learning_rate": 2.3894862604540028e-06,
1057
+ "loss": 0.3447,
1058
+ "step": 3650
1059
+ },
1060
+ {
1061
+ "epoch": 2.370967741935484,
1062
+ "grad_norm": 11.249506950378418,
1063
+ "learning_rate": 2.3297491039426526e-06,
1064
+ "loss": 0.3544,
1065
+ "step": 3675
1066
+ },
1067
+ {
1068
+ "epoch": 2.3870967741935485,
1069
+ "grad_norm": 8.8016996383667,
1070
+ "learning_rate": 2.2700119474313025e-06,
1071
+ "loss": 0.3277,
1072
+ "step": 3700
1073
+ },
1074
+ {
1075
+ "epoch": 2.403225806451613,
1076
+ "grad_norm": 9.774581909179688,
1077
+ "learning_rate": 2.2102747909199524e-06,
1078
+ "loss": 0.3346,
1079
+ "step": 3725
1080
+ },
1081
+ {
1082
+ "epoch": 2.4193548387096775,
1083
+ "grad_norm": 8.027830123901367,
1084
+ "learning_rate": 2.1505376344086023e-06,
1085
+ "loss": 0.3291,
1086
+ "step": 3750
1087
+ },
1088
+ {
1089
+ "epoch": 2.435483870967742,
1090
+ "grad_norm": 10.107059478759766,
1091
+ "learning_rate": 2.0908004778972526e-06,
1092
+ "loss": 0.3366,
1093
+ "step": 3775
1094
+ },
1095
+ {
1096
+ "epoch": 2.4516129032258065,
1097
+ "grad_norm": 8.280789375305176,
1098
+ "learning_rate": 2.031063321385902e-06,
1099
+ "loss": 0.3473,
1100
+ "step": 3800
1101
+ },
1102
+ {
1103
+ "epoch": 2.467741935483871,
1104
+ "grad_norm": 9.160382270812988,
1105
+ "learning_rate": 1.9713261648745523e-06,
1106
+ "loss": 0.3215,
1107
+ "step": 3825
1108
+ },
1109
+ {
1110
+ "epoch": 2.4838709677419355,
1111
+ "grad_norm": 7.922098636627197,
1112
+ "learning_rate": 1.911589008363202e-06,
1113
+ "loss": 0.3285,
1114
+ "step": 3850
1115
+ },
1116
+ {
1117
+ "epoch": 2.5,
1118
+ "grad_norm": 9.239423751831055,
1119
+ "learning_rate": 1.8518518518518519e-06,
1120
+ "loss": 0.3608,
1121
+ "step": 3875
1122
+ },
1123
+ {
1124
+ "epoch": 2.5161290322580645,
1125
+ "grad_norm": 8.667262077331543,
1126
+ "learning_rate": 1.792114695340502e-06,
1127
+ "loss": 0.3389,
1128
+ "step": 3900
1129
+ },
1130
+ {
1131
+ "epoch": 2.532258064516129,
1132
+ "grad_norm": 10.475480079650879,
1133
+ "learning_rate": 1.7323775388291518e-06,
1134
+ "loss": 0.3226,
1135
+ "step": 3925
1136
+ },
1137
+ {
1138
+ "epoch": 2.5483870967741935,
1139
+ "grad_norm": 11.079362869262695,
1140
+ "learning_rate": 1.6726403823178017e-06,
1141
+ "loss": 0.3559,
1142
+ "step": 3950
1143
+ },
1144
+ {
1145
+ "epoch": 2.564516129032258,
1146
+ "grad_norm": 10.680990219116211,
1147
+ "learning_rate": 1.6129032258064516e-06,
1148
+ "loss": 0.297,
1149
+ "step": 3975
1150
+ },
1151
+ {
1152
+ "epoch": 2.5806451612903225,
1153
+ "grad_norm": 13.101299285888672,
1154
+ "learning_rate": 1.5531660692951017e-06,
1155
+ "loss": 0.3359,
1156
+ "step": 4000
1157
+ },
1158
+ {
1159
+ "epoch": 2.5806451612903225,
1160
+ "eval_cer": 48.63818252226668,
1161
+ "eval_loss": 0.6473900675773621,
1162
+ "eval_runtime": 969.6918,
1163
+ "eval_samples_per_second": 2.352,
1164
+ "eval_steps_per_second": 0.295,
1165
+ "step": 4000
1166
  }
1167
  ],
1168
  "logging_steps": 25,
 
1170
  "num_input_tokens_seen": 0,
1171
  "num_train_epochs": 3,
1172
  "save_steps": 1000,
1173
+ "total_flos": 1.84665797664768e+19,
1174
  "train_batch_size": 16,
1175
  "trial_name": null,
1176
  "trial_params": null