Training in progress, step 168, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30d12559217c7f23d41501f47dca546b26251a84fd89ee55f980fc13aa93f12b
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ffd098374c4829deef4e86bb45b6d99e299ec4d785b2818c11701cc0184f9b4
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2fa88fe8784ca0c128965c9ad8a5a8e29ef38b586f101c90b586476e3ed4558
 size 51613348

 version https://git-lfs.github.com/spec/v1
+oid sha256:d430efc5b96ee5cc101a4108edaa9ac66a363cbaf91802f71917f34d4f30221f
 size 51613348

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ac024c06a7fafa2800668c036cb57c23ab71073198d0a72d5ca171de9c65f87b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e477b2f97b5d76e20fb3241eba6754e5c05c0a9ad01e9b25e3c321898b520d6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5d6789ad2e7141c857db1a1b89c580a274f669224bfc6f48fb1745262d15c182
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:79783dc253db8561cbce9475c59c8adb4831f213eda27032651f04af31d6a595
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.018875697539418,
   "eval_steps": 42,
-  "global_step": 126,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -333,6 +333,112 @@
       "eval_samples_per_second": 35.97,
       "eval_steps_per_second": 4.498,
       "step": 126
     }
   ],
   "logging_steps": 3,
@@ -352,7 +458,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.03686064914432e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.025167596719224,
   "eval_steps": 42,
+  "global_step": 168,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 35.97,
       "eval_steps_per_second": 4.498,
       "step": 126
+    },
+    {
+      "epoch": 0.01932511890940414,
+      "grad_norm": 0.13838274776935577,
+      "learning_rate": 4.306987159568479e-05,
+      "loss": 0.9996,
+      "step": 129
+    },
+    {
+      "epoch": 0.019774540279390285,
+      "grad_norm": 0.14386090636253357,
+      "learning_rate": 4.273424802513145e-05,
+      "loss": 0.9947,
+      "step": 132
+    },
+    {
+      "epoch": 0.020223961649376426,
+      "grad_norm": 0.14473505318164825,
+      "learning_rate": 4.239206376508717e-05,
+      "loss": 1.1779,
+      "step": 135
+    },
+    {
+      "epoch": 0.02067338301936257,
+      "grad_norm": 0.15975573658943176,
+      "learning_rate": 4.204344540477499e-05,
+      "loss": 1.2125,
+      "step": 138
+    },
+    {
+      "epoch": 0.021122804389348715,
+      "grad_norm": 0.12576735019683838,
+      "learning_rate": 4.16885219136787e-05,
+      "loss": 1.1312,
+      "step": 141
+    },
+    {
+      "epoch": 0.021572225759334856,
+      "grad_norm": 0.16916967928409576,
+      "learning_rate": 4.132742459383122e-05,
+      "loss": 1.0823,
+      "step": 144
+    },
+    {
+      "epoch": 0.022021647129321,
+      "grad_norm": 0.17954471707344055,
+      "learning_rate": 4.096028703124014e-05,
+      "loss": 1.0728,
+      "step": 147
+    },
+    {
+      "epoch": 0.02247106849930714,
+      "grad_norm": 0.11972087621688843,
+      "learning_rate": 4.058724504646834e-05,
+      "loss": 1.0886,
+      "step": 150
+    },
+    {
+      "epoch": 0.022920489869293285,
+      "grad_norm": 0.14640676975250244,
+      "learning_rate": 4.0208436644387834e-05,
+      "loss": 1.0472,
+      "step": 153
+    },
+    {
+      "epoch": 0.023369911239279426,
+      "grad_norm": 0.13775284588336945,
+      "learning_rate": 3.982400196312564e-05,
+      "loss": 1.0984,
+      "step": 156
+    },
+    {
+      "epoch": 0.02381933260926557,
+      "grad_norm": 0.14184071123600006,
+      "learning_rate": 3.943408322222049e-05,
+      "loss": 1.1431,
+      "step": 159
+    },
+    {
+      "epoch": 0.024268753979251715,
+      "grad_norm": 0.1685701608657837,
+      "learning_rate": 3.903882467000937e-05,
+      "loss": 1.1531,
+      "step": 162
+    },
+    {
+      "epoch": 0.024718175349237856,
+      "grad_norm": 0.1468999981880188,
+      "learning_rate": 3.8638372530263715e-05,
+      "loss": 1.1069,
+      "step": 165
+    },
+    {
+      "epoch": 0.025167596719224,
+      "grad_norm": 0.13278649747371674,
+      "learning_rate": 3.823287494809469e-05,
+      "loss": 1.1124,
+      "step": 168
+    },
+    {
+      "epoch": 0.025167596719224,
+      "eval_loss": 1.1012423038482666,
+      "eval_runtime": 312.1008,
+      "eval_samples_per_second": 36.024,
+      "eval_steps_per_second": 4.505,
+      "step": 168
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 5.365118024024064e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null