Training in progress, step 150, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d44bc5be1e3118d2e77ce82df05169f0d9aabeb7f36133abc9476b7e106edf50
 size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d0a8a3ec4f34418a88a656cab64ce468819986166088766eac291b51c015350
 size 1001465824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d59946c8391924c55bdbeee729207f19b6103e226220bb310247dcc3cc9d9cb
 size 509176980

 version https://git-lfs.github.com/spec/v1
+oid sha256:5204c920a9c28a65017b7dc19a74b014324204b35ad86defdf6ec3b9bfab5a3b
 size 509176980

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e89a5182bb97955f9b45a8cc3c3bf35765c6a725efad38ec6b58f6ba203942c8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:026b710e7a787e8533a7b53109192c1ec5bf78162ec26798b66409b9a5f88889
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dac856699362054adec05f5effdce5ef83a8d5422ff5e800f6e464aadfae0e0f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cb01d4f0da8d959e9ca5a4773566e24c639c6b8f18e1285757275e715c7b05f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.346320346320346,
   "eval_steps": 15,
-  "global_step": 135,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -402,6 +402,49 @@
       "eval_samples_per_second": 4.437,
       "eval_steps_per_second": 0.595,
       "step": 135
     }
   ],
   "logging_steps": 3,
@@ -421,7 +464,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.94240387572695e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.606060606060606,
   "eval_steps": 15,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.437,
       "eval_steps_per_second": 0.595,
       "step": 135
+    },
+    {
+      "epoch": 2.398268398268398,
+      "grad_norm": 1.1166489124298096,
+      "learning_rate": 1.0951380325872979e-05,
+      "loss": 1.5943,
+      "step": 138
+    },
+    {
+      "epoch": 2.45021645021645,
+      "grad_norm": 1.1581447124481201,
+      "learning_rate": 9.212000874196953e-06,
+      "loss": 1.7557,
+      "step": 141
+    },
+    {
+      "epoch": 2.502164502164502,
+      "grad_norm": 1.1575648784637451,
+      "learning_rate": 7.60894734597476e-06,
+      "loss": 1.6507,
+      "step": 144
+    },
+    {
+      "epoch": 2.554112554112554,
+      "grad_norm": 1.119139313697815,
+      "learning_rate": 6.147577634637414e-06,
+      "loss": 1.7355,
+      "step": 147
+    },
+    {
+      "epoch": 2.606060606060606,
+      "grad_norm": 1.2103753089904785,
+      "learning_rate": 4.832776083120982e-06,
+      "loss": 1.6898,
+      "step": 150
+    },
+    {
+      "epoch": 2.606060606060606,
+      "eval_loss": 0.6833479404449463,
+      "eval_runtime": 21.8503,
+      "eval_samples_per_second": 4.439,
+      "eval_steps_per_second": 0.595,
+      "step": 150
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 7.70877313473577e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null