Training in progress, step 4000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +294 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d0803610ba65b84aa922737b5b285d0c80e484a3a23c1f04437d154c379e791
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:b810b7f0fe83995300521a646137d370d36d549e50472fcf6c1da2a177919ec3
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52421390aa8c070825bd1704b600e410fc95c7928f2caaaa853f621811e988d0
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a70bf563a60fc19871fd719056a40461333af72260151f6bade95b428cbc93c
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9e77c3664e6c2303c974515f610c095940e1b9f1a09380dcd8d25d4c4eb1d05
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7c93a397e9322e49f4ed50d18f810eaf2c39ecdb2985c95d248cd7a2fa2aa47
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9965564127cf0748ae57ecf2b02aba0f15495da8346241873c057a0c14f61d6d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd54311344b834087a4b1c20d06544579c7f43d33908960b6b3b61734dbde46d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 31.230569948186528,
-  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-3000",
-  "epoch": 1.935483870967742,
   "eval_steps": 1000,
-  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -874,6 +874,295 @@
       "eval_samples_per_second": 2.385,
       "eval_steps_per_second": 0.299,
       "step": 3000
     }
   ],
   "logging_steps": 25,
@@ -881,7 +1170,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 1000,
-  "total_flos": 1.38506562883584e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 29.080310880829014,
+  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
+  "epoch": 2.5806451612903225,
   "eval_steps": 1000,
+  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.385,
       "eval_steps_per_second": 0.299,
       "step": 3000
+    },
+    {
+      "epoch": 1.9516129032258065,
+      "grad_norm": 9.115569114685059,
+      "learning_rate": 3.882915173237754e-06,
+      "loss": 0.2929,
+      "step": 3025
+    },
+    {
+      "epoch": 1.967741935483871,
+      "grad_norm": 10.029779434204102,
+      "learning_rate": 3.823178016726404e-06,
+      "loss": 0.2939,
+      "step": 3050
+    },
+    {
+      "epoch": 1.9838709677419355,
+      "grad_norm": 10.219369888305664,
+      "learning_rate": 3.763440860215054e-06,
+      "loss": 0.3228,
+      "step": 3075
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 10.639910697937012,
+      "learning_rate": 3.7037037037037037e-06,
+      "loss": 0.3236,
+      "step": 3100
+    },
+    {
+      "epoch": 2.0161290322580645,
+      "grad_norm": 4.525390148162842,
+      "learning_rate": 3.643966547192354e-06,
+      "loss": 0.1713,
+      "step": 3125
+    },
+    {
+      "epoch": 2.032258064516129,
+      "grad_norm": 6.762115478515625,
+      "learning_rate": 3.584229390681004e-06,
+      "loss": 0.1892,
+      "step": 3150
+    },
+    {
+      "epoch": 2.0483870967741935,
+      "grad_norm": 8.670353889465332,
+      "learning_rate": 3.5244922341696534e-06,
+      "loss": 0.1902,
+      "step": 3175
+    },
+    {
+      "epoch": 2.064516129032258,
+      "grad_norm": 6.066471576690674,
+      "learning_rate": 3.4647550776583037e-06,
+      "loss": 0.157,
+      "step": 3200
+    },
+    {
+      "epoch": 2.0806451612903225,
+      "grad_norm": 5.877708911895752,
+      "learning_rate": 3.4050179211469536e-06,
+      "loss": 0.18,
+      "step": 3225
+    },
+    {
+      "epoch": 2.096774193548387,
+      "grad_norm": 5.906160354614258,
+      "learning_rate": 3.3452807646356034e-06,
+      "loss": 0.198,
+      "step": 3250
+    },
+    {
+      "epoch": 2.1129032258064515,
+      "grad_norm": 7.664149761199951,
+      "learning_rate": 3.2855436081242537e-06,
+      "loss": 0.1889,
+      "step": 3275
+    },
+    {
+      "epoch": 2.129032258064516,
+      "grad_norm": 6.261497497558594,
+      "learning_rate": 3.225806451612903e-06,
+      "loss": 0.157,
+      "step": 3300
+    },
+    {
+      "epoch": 2.1451612903225805,
+      "grad_norm": 6.205556869506836,
+      "learning_rate": 3.1660692951015535e-06,
+      "loss": 0.1635,
+      "step": 3325
+    },
+    {
+      "epoch": 2.161290322580645,
+      "grad_norm": 5.549154758453369,
+      "learning_rate": 3.1063321385902034e-06,
+      "loss": 0.1799,
+      "step": 3350
+    },
+    {
+      "epoch": 2.1774193548387095,
+      "grad_norm": 7.939329624176025,
+      "learning_rate": 3.0465949820788532e-06,
+      "loss": 0.159,
+      "step": 3375
+    },
+    {
+      "epoch": 2.193548387096774,
+      "grad_norm": 7.168279647827148,
+      "learning_rate": 2.9868578255675035e-06,
+      "loss": 0.1772,
+      "step": 3400
+    },
+    {
+      "epoch": 2.2096774193548385,
+      "grad_norm": 7.339049816131592,
+      "learning_rate": 2.9271206690561534e-06,
+      "loss": 0.161,
+      "step": 3425
+    },
+    {
+      "epoch": 2.225806451612903,
+      "grad_norm": 7.939656734466553,
+      "learning_rate": 2.867383512544803e-06,
+      "loss": 0.163,
+      "step": 3450
+    },
+    {
+      "epoch": 2.241935483870968,
+      "grad_norm": 6.338183403015137,
+      "learning_rate": 2.807646356033453e-06,
+      "loss": 0.1671,
+      "step": 3475
+    },
+    {
+      "epoch": 2.258064516129032,
+      "grad_norm": 4.27256441116333,
+      "learning_rate": 2.747909199522103e-06,
+      "loss": 0.1696,
+      "step": 3500
+    },
+    {
+      "epoch": 2.274193548387097,
+      "grad_norm": 7.593271255493164,
+      "learning_rate": 2.688172043010753e-06,
+      "loss": 0.1558,
+      "step": 3525
+    },
+    {
+      "epoch": 2.2903225806451615,
+      "grad_norm": 3.984323501586914,
+      "learning_rate": 2.6284348864994032e-06,
+      "loss": 0.1727,
+      "step": 3550
+    },
+    {
+      "epoch": 2.306451612903226,
+      "grad_norm": 6.01830530166626,
+      "learning_rate": 2.5686977299880527e-06,
+      "loss": 0.1681,
+      "step": 3575
+    },
+    {
+      "epoch": 2.3225806451612905,
+      "grad_norm": 8.704211235046387,
+      "learning_rate": 2.5089605734767026e-06,
+      "loss": 0.1703,
+      "step": 3600
+    },
+    {
+      "epoch": 2.338709677419355,
+      "grad_norm": 7.5924506187438965,
+      "learning_rate": 2.4492234169653525e-06,
+      "loss": 0.1723,
+      "step": 3625
+    },
+    {
+      "epoch": 2.3548387096774195,
+      "grad_norm": 5.732957363128662,
+      "learning_rate": 2.3894862604540028e-06,
+      "loss": 0.1586,
+      "step": 3650
+    },
+    {
+      "epoch": 2.370967741935484,
+      "grad_norm": 7.640561580657959,
+      "learning_rate": 2.3297491039426526e-06,
+      "loss": 0.1781,
+      "step": 3675
+    },
+    {
+      "epoch": 2.3870967741935485,
+      "grad_norm": 7.3015313148498535,
+      "learning_rate": 2.2700119474313025e-06,
+      "loss": 0.1571,
+      "step": 3700
+    },
+    {
+      "epoch": 2.403225806451613,
+      "grad_norm": 7.6669440269470215,
+      "learning_rate": 2.2102747909199524e-06,
+      "loss": 0.1532,
+      "step": 3725
+    },
+    {
+      "epoch": 2.4193548387096775,
+      "grad_norm": 4.207671165466309,
+      "learning_rate": 2.1505376344086023e-06,
+      "loss": 0.1658,
+      "step": 3750
+    },
+    {
+      "epoch": 2.435483870967742,
+      "grad_norm": 6.316219806671143,
+      "learning_rate": 2.0908004778972526e-06,
+      "loss": 0.1681,
+      "step": 3775
+    },
+    {
+      "epoch": 2.4516129032258065,
+      "grad_norm": 6.380753040313721,
+      "learning_rate": 2.031063321385902e-06,
+      "loss": 0.1595,
+      "step": 3800
+    },
+    {
+      "epoch": 2.467741935483871,
+      "grad_norm": 7.425994873046875,
+      "learning_rate": 1.9713261648745523e-06,
+      "loss": 0.1457,
+      "step": 3825
+    },
+    {
+      "epoch": 2.4838709677419355,
+      "grad_norm": 7.0207600593566895,
+      "learning_rate": 1.911589008363202e-06,
+      "loss": 0.1557,
+      "step": 3850
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 7.421679496765137,
+      "learning_rate": 1.8518518518518519e-06,
+      "loss": 0.17,
+      "step": 3875
+    },
+    {
+      "epoch": 2.5161290322580645,
+      "grad_norm": 7.43884801864624,
+      "learning_rate": 1.792114695340502e-06,
+      "loss": 0.1605,
+      "step": 3900
+    },
+    {
+      "epoch": 2.532258064516129,
+      "grad_norm": 6.753660202026367,
+      "learning_rate": 1.7323775388291518e-06,
+      "loss": 0.1467,
+      "step": 3925
+    },
+    {
+      "epoch": 2.5483870967741935,
+      "grad_norm": 5.872158050537109,
+      "learning_rate": 1.6726403823178017e-06,
+      "loss": 0.1511,
+      "step": 3950
+    },
+    {
+      "epoch": 2.564516129032258,
+      "grad_norm": 5.571343421936035,
+      "learning_rate": 1.6129032258064516e-06,
+      "loss": 0.1476,
+      "step": 3975
+    },
+    {
+      "epoch": 2.5806451612903225,
+      "grad_norm": 9.308771133422852,
+      "learning_rate": 1.5531660692951017e-06,
+      "loss": 0.1566,
+      "step": 4000
+    },
+    {
+      "epoch": 2.5806451612903225,
+      "eval_cer": 29.080310880829014,
+      "eval_loss": 0.37441545724868774,
+      "eval_runtime": 959.8469,
+      "eval_samples_per_second": 2.376,
+      "eval_steps_per_second": 0.298,
+      "step": 4000
     }
   ],
   "logging_steps": 25,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 1000,
+  "total_flos": 1.84665797664768e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null