Training in progress, step 294, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c4d1afd543174e3ab9a2fe34af04ea8af62f4a7862fc78925dd62a536e9feaf
 size 201892112

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcc7c1bb49029978b38479c4be7f1ef61c6a2f919bd26fa335b25ed148c05645
 size 201892112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c67f2182ec15d55237558a16b27d2d849447fdca6258247107fbf31c03b1c68d
-size 102864548

 version https://git-lfs.github.com/spec/v1
+oid sha256:bc3812fdb507dc890ad3dfeb461b18c142361e92db1a8d19d6b3dbcdb50cb254
+size 102864868

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa5a85cd5fb1a43d1a3608369f309632ab765e2fc493019f6bf624d685f85850
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fd586e160e28252df6d0804518bcfa9955584c079b4b4506e38e98dfc07096c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee19ddad9c4c375a1de2d74fb4c1cf5e15d36c1ed47a2cb80f7cb0fbacb3b29e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:318b114b83c26bcba11815378a88e0015bce044b0002c702e0a2627e1a1d1e56
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05282465150403522,
   "eval_steps": 42,
-  "global_step": 252,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -651,6 +651,112 @@
       "eval_samples_per_second": 48.7,
       "eval_steps_per_second": 6.091,
       "step": 252
     }
   ],
   "logging_steps": 3,
@@ -670,7 +776,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.375532588230246e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.061628760088041086,
   "eval_steps": 42,
+  "global_step": 294,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 48.7,
       "eval_steps_per_second": 6.091,
       "step": 252
+    },
+    {
+      "epoch": 0.05345351640289278,
+      "grad_norm": 0.36929386854171753,
+      "learning_rate": 5e-05,
+      "loss": 1.0279,
+      "step": 255
+    },
+    {
+      "epoch": 0.05408238130175034,
+      "grad_norm": 0.34936413168907166,
+      "learning_rate": 4.903834726061565e-05,
+      "loss": 1.0564,
+      "step": 258
+    },
+    {
+      "epoch": 0.0547112462006079,
+      "grad_norm": 0.33937400579452515,
+      "learning_rate": 4.807705027948008e-05,
+      "loss": 1.0349,
+      "step": 261
+    },
+    {
+      "epoch": 0.05534011109946547,
+      "grad_norm": 0.3877185583114624,
+      "learning_rate": 4.711646468323129e-05,
+      "loss": 1.0518,
+      "step": 264
+    },
+    {
+      "epoch": 0.055968975998323026,
+      "grad_norm": 0.36920610070228577,
+      "learning_rate": 4.6156945835334184e-05,
+      "loss": 1.0907,
+      "step": 267
+    },
+    {
+      "epoch": 0.05659784089718059,
+      "grad_norm": 0.3572535216808319,
+      "learning_rate": 4.5198848704615914e-05,
+      "loss": 1.0811,
+      "step": 270
+    },
+    {
+      "epoch": 0.05722670579603815,
+      "grad_norm": 0.3565980792045593,
+      "learning_rate": 4.424252773394704e-05,
+      "loss": 1.0598,
+      "step": 273
+    },
+    {
+      "epoch": 0.05785557069489571,
+      "grad_norm": 0.3740125298500061,
+      "learning_rate": 4.328833670911724e-05,
+      "loss": 1.0403,
+      "step": 276
+    },
+    {
+      "epoch": 0.05848443559375328,
+      "grad_norm": 0.382012277841568,
+      "learning_rate": 4.23366286279542e-05,
+      "loss": 1.0877,
+      "step": 279
+    },
+    {
+      "epoch": 0.059113300492610835,
+      "grad_norm": 0.359355092048645,
+      "learning_rate": 4.138775556973406e-05,
+      "loss": 1.0644,
+      "step": 282
+    },
+    {
+      "epoch": 0.0597421653914684,
+      "grad_norm": 0.35905787348747253,
+      "learning_rate": 4.04420685649314e-05,
+      "loss": 1.0783,
+      "step": 285
+    },
+    {
+      "epoch": 0.060371030290325964,
+      "grad_norm": 0.4150475263595581,
+      "learning_rate": 3.9499917465357534e-05,
+      "loss": 1.0996,
+      "step": 288
+    },
+    {
+      "epoch": 0.06099989518918352,
+      "grad_norm": 0.3718733489513397,
+      "learning_rate": 3.856165081473474e-05,
+      "loss": 1.0191,
+      "step": 291
+    },
+    {
+      "epoch": 0.061628760088041086,
+      "grad_norm": 0.36830800771713257,
+      "learning_rate": 3.762761571975429e-05,
+      "loss": 1.1456,
+      "step": 294
+    },
+    {
+      "epoch": 0.061628760088041086,
+      "eval_loss": 1.0759488344192505,
+      "eval_runtime": 164.9861,
+      "eval_samples_per_second": 48.701,
+      "eval_steps_per_second": 6.091,
+      "step": 294
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 6.271454686268621e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null