Training in progress, epoch 1, checkpoint

Files changed (10) hide show

last-checkpoint/model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08e0005225b3dbaf55dd13ac62926cc7e02c1025d66fa375e6fb305ff79cd4f9
 size 4993448880

 version https://git-lfs.github.com/spec/v1
+oid sha256:af607956bb836bd30abffeab7e595a2ad7bcbbbdeda65c843e8bb6be5393c030
 size 4993448880

last-checkpoint/model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:630ca774672856d2e0e39a702e590f635a1cfc5726a64b6578ab46dd367369a9
 size 1180663192

 version https://git-lfs.github.com/spec/v1
+oid sha256:68d963fb613c0fdebdca7001b1d4d7ff4cf7136e76fac49dc4c9993cff7b0f1b
 size 1180663192

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:daf6110a01f63dd6631922baba835475bc3d90516bd3fedb2f9181524261c079
-size 4712

 version https://git-lfs.github.com/spec/v1
+oid sha256:87608d67c06604df24518b25c089309e509a99209c77fbf6483710447e67e32c
+size 3137574192

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26d08ced4d0f6490515a22c9e9401cc7f71de8b6e2c1525e9c8dac221d4b80ab
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:aeb3365cb23fc0ad7ef103f7e7c20c5f3cb4c39bbc3781411623d0ecc15fdec8
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:182d3e4bee7983edebdece2816547628c5ac3e14f5b53eac54c08ad9cbfbfac7
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b12deca4eebf12493c0f50cc78084223762e2e6f4349141d6e6d1e5efd0ae7b7
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:674c98433de6d8eccea8d9f711e54aa6a237220bb981e22133b7735c14dc835f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bfb18fd20d740d864e7a6cec82e5cde8914525ecfd5545ced227f05f64807011
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e6ca2e2b0a88474561faf5e462ebee1de20672630f977a22533f633eb059d6d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:46f2bb6abf3eb51a791cbe95c0d4cdd70b37ff3bbb853fa053882b049478436f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d65a8b9e7780be85cfc34df4e5638b03f11c9d0d12c44bc1962fc4e9ec2bba5e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d630d518d61962a1cef0800a6c56a709c5f2ad6682a72036d2bee9b4ec793c47
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,41 +1,39 @@
 {
-  "best_metric": 85.91749644381224,
-  "best_model_checkpoint": "./models/rootflo/fauna-v3.2/checkpoint-1",
-  "epoch": 2.0,
-  "eval_steps": 2,
-  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "eval_loss": 0.8858556151390076,
-      "eval_runtime": 433.9656,
-      "eval_samples_per_second": 0.012,
-      "eval_steps_per_second": 0.002,
-      "eval_wer": 85.91749644381224,
-      "step": 1
     },
     {
-      "epoch": 2.0,
-      "grad_norm": Infinity,
-      "learning_rate": 0.0,
-      "loss": 0.9645,
-      "step": 2
     },
     {
-      "epoch": 2.0,
-      "eval_loss": 0.8858556151390076,
-      "eval_runtime": 306.8104,
-      "eval_samples_per_second": 0.016,
-      "eval_steps_per_second": 0.003,
-      "eval_wer": 85.91749644381224,
-      "step": 2
     }
   ],
-  "logging_steps": 2,
-  "max_steps": 8,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 8,
   "save_steps": 500,
@@ -46,7 +44,7 @@
         "early_stopping_threshold": 0.01
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -60,7 +58,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.609278806543827e+18,
   "train_batch_size": 96,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 108.55545060029843,
+  "best_model_checkpoint": "./models/rootflo/fauna-v3.2/checkpoint-460",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.43478260869565216,
+      "grad_norm": 0.5209075808525085,
+      "learning_rate": 1.9919182516351267e-06,
+      "loss": 0.4607,
+      "step": 200
     },
     {
+      "epoch": 0.8695652173913043,
+      "grad_norm": 0.5061023235321045,
+      "learning_rate": 1.955244232925309e-06,
+      "loss": 0.2067,
+      "step": 400
     },
     {
+      "epoch": 1.0,
+      "eval_loss": 0.08908500522375107,
+      "eval_runtime": 28373.2469,
+      "eval_samples_per_second": 1.383,
+      "eval_steps_per_second": 0.004,
+      "eval_wer": 108.55545060029843,
+      "step": 460
     }
   ],
+  "logging_steps": 200,
+  "max_steps": 3680,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 8,
   "save_steps": 500,
         "early_stopping_threshold": 0.01
       },
       "attributes": {
+        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.2002681817409278e+21,
   "train_batch_size": 96,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79ec0733d5afeac0bcf6a6993b58aabd02a4282d270e6ecec9d37441fc1cc21e
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:91ac8723f36b532927f549ae9030c1cf0c332747e6f415dca7f60bb56fa5e0bb
 size 5432