Training in progress, step 1500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ab8c96d034a3c39633f65fbe492a96d5da8cdfe6fee8067059ab5aafb6a82ce
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:09b1576cc961a58ac323dc5821a7971ab4e1659d8ccda4dd695d24372d001011
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bd64a777229dbef4bf82dff14d6cb53c591af1b557e7c7b175c65b32ceda7b5
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4bf356ca7d0fced26307bc5262ea3dbb16474dacd7db51680f175b28a5cc5de
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1e95b55e61f2a72e5e5389523fde8c9fb1a2902741a578a17a342a5d7a4df5d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9f96556c91f78b167a3a23f1c3f779be5f90901a0a97f9cd4811d2ba7a3f74c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db3c07fb5d0875fffe56d0b893055e00ce55f7141ff11534660111c35e822163
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a4ea870e9c13e7b2891e6dcdeeb66805f2a1e6510caa89ebe81fc5367782860
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 98.0078125,
   "best_model_checkpoint": "./whisper-small-ha-v9/checkpoint-1000",
-  "epoch": 6.369426751592357,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -307,6 +307,156 @@
       "eval_wer": 98.0078125,
       "eval_wer_ortho": 97.75819122437248,
       "step": 1000
     }
   ],
   "logging_steps": 25,
@@ -326,7 +476,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.61044035551232e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 98.0078125,
   "best_model_checkpoint": "./whisper-small-ha-v9/checkpoint-1000",
+  "epoch": 9.554140127388536,
   "eval_steps": 500,
+  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_wer": 98.0078125,
       "eval_wer_ortho": 97.75819122437248,
       "step": 1000
+    },
+    {
+      "epoch": 6.528662420382165,
+      "grad_norm": 6.898542404174805,
+      "learning_rate": 0.0005,
+      "loss": 1.2127,
+      "step": 1025
+    },
+    {
+      "epoch": 6.687898089171974,
+      "grad_norm": 6.266862392425537,
+      "learning_rate": 0.0005,
+      "loss": 1.2291,
+      "step": 1050
+    },
+    {
+      "epoch": 6.8471337579617835,
+      "grad_norm": 6.253779888153076,
+      "learning_rate": 0.0005,
+      "loss": 1.2292,
+      "step": 1075
+    },
+    {
+      "epoch": 7.006369426751593,
+      "grad_norm": 5.176928520202637,
+      "learning_rate": 0.0005,
+      "loss": 1.2593,
+      "step": 1100
+    },
+    {
+      "epoch": 7.165605095541402,
+      "grad_norm": 4.826560020446777,
+      "learning_rate": 0.0005,
+      "loss": 1.008,
+      "step": 1125
+    },
+    {
+      "epoch": 7.32484076433121,
+      "grad_norm": 5.738828659057617,
+      "learning_rate": 0.0005,
+      "loss": 1.0366,
+      "step": 1150
+    },
+    {
+      "epoch": 7.484076433121019,
+      "grad_norm": 5.866888523101807,
+      "learning_rate": 0.0005,
+      "loss": 1.1405,
+      "step": 1175
+    },
+    {
+      "epoch": 7.643312101910828,
+      "grad_norm": 6.350291728973389,
+      "learning_rate": 0.0005,
+      "loss": 1.1344,
+      "step": 1200
+    },
+    {
+      "epoch": 7.802547770700637,
+      "grad_norm": 6.462769508361816,
+      "learning_rate": 0.0005,
+      "loss": 1.1296,
+      "step": 1225
+    },
+    {
+      "epoch": 7.961783439490446,
+      "grad_norm": 6.252405166625977,
+      "learning_rate": 0.0005,
+      "loss": 1.1576,
+      "step": 1250
+    },
+    {
+      "epoch": 8.121019108280255,
+      "grad_norm": 6.970676898956299,
+      "learning_rate": 0.0005,
+      "loss": 0.9843,
+      "step": 1275
+    },
+    {
+      "epoch": 8.280254777070065,
+      "grad_norm": 5.744741916656494,
+      "learning_rate": 0.0005,
+      "loss": 0.9896,
+      "step": 1300
+    },
+    {
+      "epoch": 8.439490445859873,
+      "grad_norm": 6.012212753295898,
+      "learning_rate": 0.0005,
+      "loss": 1.0226,
+      "step": 1325
+    },
+    {
+      "epoch": 8.598726114649681,
+      "grad_norm": 5.396963596343994,
+      "learning_rate": 0.0005,
+      "loss": 1.0337,
+      "step": 1350
+    },
+    {
+      "epoch": 8.757961783439491,
+      "grad_norm": 6.041420936584473,
+      "learning_rate": 0.0005,
+      "loss": 1.1026,
+      "step": 1375
+    },
+    {
+      "epoch": 8.9171974522293,
+      "grad_norm": 7.039705276489258,
+      "learning_rate": 0.0005,
+      "loss": 1.0957,
+      "step": 1400
+    },
+    {
+      "epoch": 9.07643312101911,
+      "grad_norm": 5.3199028968811035,
+      "learning_rate": 0.0005,
+      "loss": 1.0184,
+      "step": 1425
+    },
+    {
+      "epoch": 9.235668789808917,
+      "grad_norm": 5.058565616607666,
+      "learning_rate": 0.0005,
+      "loss": 0.9048,
+      "step": 1450
+    },
+    {
+      "epoch": 9.394904458598726,
+      "grad_norm": 5.007670879364014,
+      "learning_rate": 0.0005,
+      "loss": 0.9577,
+      "step": 1475
+    },
+    {
+      "epoch": 9.554140127388536,
+      "grad_norm": 5.517505168914795,
+      "learning_rate": 0.0005,
+      "loss": 0.9893,
+      "step": 1500
+    },
+    {
+      "epoch": 9.554140127388536,
+      "eval_loss": 4.792229652404785,
+      "eval_runtime": 245.7983,
+      "eval_samples_per_second": 2.685,
+      "eval_steps_per_second": 0.171,
+      "eval_wer": 107.6171875,
+      "eval_wer_ortho": 108.7373059973175,
+      "step": 1500
     }
   ],
   "logging_steps": 25,
       "attributes": {}
     }
   },
+  "total_flos": 6.91566053326848e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null