Training in progress, step 5000, checkpoint

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71e295c3d1f676bfb0d3fd49cdc2cf71014ff519a6577b1e693b3d8da39745cc
 size 4785762744

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9e709f2b6aff15184df51e230bda7cd95f138ebf307fb685e47bd4144fa60c4
 size 4785762744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4f833405284ac2584aa0571b66b1efbd54f4bf68ae71c4ed62f10f5532985520
 size 3497859804

 version https://git-lfs.github.com/spec/v1
+oid sha256:0219dbbfa399b842520b5177ca95d1af828b294ca5f51a3bda075ca0414a6d70
 size 3497859804

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:51a1858872f1d855aa2e9f6a605bc0f33fefd12a28a30c2dd4668559808db1d0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:25d5326d5d920a3536b649f61851c1c782c12a52b22b2172b54973e7158a2031
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.4841291747170855,
   "eval_steps": 500,
-  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -322,6 +322,41 @@
       "learning_rate": 5.9209209152921384e-05,
       "loss": 0.1541,
       "step": 4500
     }
   ],
   "logging_steps": 100,
@@ -341,7 +376,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.388204789671264e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.760143527463428,
   "eval_steps": 500,
+  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.9209209152921384e-05,
       "loss": 0.1541,
       "step": 4500
+    },
+    {
+      "epoch": 2.5393320452663537,
+      "grad_norm": 0.9550230503082275,
+      "learning_rate": 5.730821948956665e-05,
+      "loss": 0.1556,
+      "step": 4600
+    },
+    {
+      "epoch": 2.5945349158156223,
+      "grad_norm": 0.6794816255569458,
+      "learning_rate": 5.539636934916247e-05,
+      "loss": 0.1529,
+      "step": 4700
+    },
+    {
+      "epoch": 2.649737786364891,
+      "grad_norm": 0.8268643021583557,
+      "learning_rate": 5.347649986211022e-05,
+      "loss": 0.1538,
+      "step": 4800
+    },
+    {
+      "epoch": 2.7049406569141596,
+      "grad_norm": 0.8783162236213684,
+      "learning_rate": 5.155146407606835e-05,
+      "loss": 0.1527,
+      "step": 4900
+    },
+    {
+      "epoch": 2.760143527463428,
+      "grad_norm": 0.788532555103302,
+      "learning_rate": 4.962412271614282e-05,
+      "loss": 0.146,
+      "step": 5000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 2.650426442350461e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null