Training in progress, step 1000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4264aa68792d0923f89598d6599f70f6326b936ae39d78a25bf32ab59e404505
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:497a63d9df7b47b69459aafb78d21a688247f3b7553a7029718a7e7ec01a5ab0
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d96e3a0f5cf4aac36877655279f02a3bd52b10294ef17c0d1c492b89226555e8
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:5651e26bed9ff6056393ba02a845493639292773b1802668eb98e3c4332ec90f
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddc12d1b7488be993cbd59f65107bc8a15872051c445c6e5d1dfea163e0cb974
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c0eae5d7a16af50a465a7bb79ae72882e8fae900ed86f42a6ea24998e162e19
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2760143527463428,
   "eval_steps": 500,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -42,6 +42,41 @@
       "learning_rate": 5.518763796909493e-05,
       "loss": 0.5068,
       "step": 500
     }
   ],
   "logging_steps": 100,
@@ -61,7 +96,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.624764282039173e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5520287054926856,
   "eval_steps": 500,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.518763796909493e-05,
       "loss": 0.5068,
       "step": 500
+    },
+    {
+      "epoch": 0.3312172232956114,
+      "grad_norm": 1.1730616092681885,
+      "learning_rate": 6.622516556291392e-05,
+      "loss": 0.4895,
+      "step": 600
+    },
+    {
+      "epoch": 0.3864200938448799,
+      "grad_norm": 1.1149307489395142,
+      "learning_rate": 7.726269315673289e-05,
+      "loss": 0.5044,
+      "step": 700
+    },
+    {
+      "epoch": 0.4416229643941485,
+      "grad_norm": 1.1326206922531128,
+      "learning_rate": 8.830022075055188e-05,
+      "loss": 0.5081,
+      "step": 800
+    },
+    {
+      "epoch": 0.49682583494341703,
+      "grad_norm": 1.1592661142349243,
+      "learning_rate": 9.933774834437086e-05,
+      "loss": 0.5024,
+      "step": 900
+    },
+    {
+      "epoch": 0.5520287054926856,
+      "grad_norm": 1.4300315380096436,
+      "learning_rate": 9.996717238759354e-05,
+      "loss": 0.5078,
+      "step": 1000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 5.303975346705531e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null