Training in progress, step 3000, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8a9b491fbd7bc6c0109b471a937c096083f66e1dcb01623a358dcd9dad343ee
 size 57029756

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ddbc523b79aaf9756951afaaf7b9b1bd6c6e25231ccc9851e69b239171dd53c
 size 57029756

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93df9e1a335779e8fcd800bfa6480ffd1974b4849cc22aaf85f32d159b020fae
 size 114100410

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc580a2c08cdc5e7ca83543b3c323f54249a857933da2e4c71cd3c5bd3da1786
 size 114100410

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0eb88244438f4fb27b9e03d80e6d2edb2df5c79a85aa77ddbf31851cc029627
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:af9b7e49efa8bec2063dadfdfeab7409886026929b750eabf49749636c16bcc5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc802da9f5f8a0a2490757e0f5753d6ca70d9156e9584db643bb0238ebb258c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a1309f61c0d311eaceb7c90e770f4b3fcc4eb1a4147f8ffeac21539facfeab8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.277904328018223,
   "eval_steps": 500,
-  "global_step": 2000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -59,6 +59,32 @@
       "learning_rate": 1.0888382687927108e-05,
       "loss": 0.1682,
       "step": 2000
     }
   ],
   "logging_steps": 500,
@@ -78,7 +104,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 41707115119026.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.416856492027335,
   "eval_steps": 500,
+  "global_step": 3000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.0888382687927108e-05,
       "loss": 0.1682,
       "step": 2000
+    },
+    {
+      "epoch": 2.847380410022779,
+      "grad_norm": 4.558180332183838,
+      "learning_rate": 8.610478359908885e-06,
+      "loss": 0.1444,
+      "step": 2500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9651611673312469,
+      "eval_f1": 0.8525989138867338,
+      "eval_loss": 0.13545359671115875,
+      "eval_precision": 0.8447348193697156,
+      "eval_recall": 0.860610806577917,
+      "eval_runtime": 2.9602,
+      "eval_samples_per_second": 1097.888,
+      "eval_steps_per_second": 68.914,
+      "step": 2634
+    },
+    {
+      "epoch": 3.416856492027335,
+      "grad_norm": 2.630763292312622,
+      "learning_rate": 6.3325740318906616e-06,
+      "loss": 0.1302,
+      "step": 3000
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 62399928651750.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null