Training in progress, step 294, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +109 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd129a1bd9e3d8ee5a1e5db9d958aed101562fb77fbab566965c550cf4cf930d
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:930e0e949e58701597d874e209b7344a4d706b043639fc115daabd8ebbeab0fe
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f8c1d468e59b49e68a6e930bd1f26d86439dccd611e450f66da506396b180023
-size 51613348

 version https://git-lfs.github.com/spec/v1
+oid sha256:709f78d5e19655cbd94a1c21408c442f8f92f8b4fdef1ad9958db57bcf19cf98
+size 51613668

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f261dee394c5079254fc75fbcc32a19145fa5f61c502d9043aac58db31a15775
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5e926e6553b71955bfbf13bceb3502fb7e3ab2b01b876537fd15965365d03680
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fde5cce913d9c5501edc422466fae8378d9b13dc57f22397ec7fc38f7801fc24
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0316ab07671fffcd24f1d6dedeb5b41d6e5808ab64506af6cae3d569237843af
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.037751395078836,
   "eval_steps": 42,
-  "global_step": 252,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -651,6 +651,112 @@
       "eval_samples_per_second": 35.926,
       "eval_steps_per_second": 4.493,
       "step": 252
     }
   ],
   "logging_steps": 3,
@@ -670,7 +776,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.058094740937114e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.044043294258642,
   "eval_steps": 42,
+  "global_step": 294,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 35.926,
       "eval_steps_per_second": 4.493,
       "step": 252
+    },
+    {
+      "epoch": 0.038200816448822145,
+      "grad_norm": 0.19113826751708984,
+      "learning_rate": 2.5e-05,
+      "loss": 1.0117,
+      "step": 255
+    },
+    {
+      "epoch": 0.03865023781880828,
+      "grad_norm": 0.17212168872356415,
+      "learning_rate": 2.4519173630307825e-05,
+      "loss": 1.136,
+      "step": 258
+    },
+    {
+      "epoch": 0.039099659188794426,
+      "grad_norm": 0.16413848102092743,
+      "learning_rate": 2.403852513974004e-05,
+      "loss": 1.0806,
+      "step": 261
+    },
+    {
+      "epoch": 0.03954908055878057,
+      "grad_norm": 0.15564818680286407,
+      "learning_rate": 2.3558232341615643e-05,
+      "loss": 1.0233,
+      "step": 264
+    },
+    {
+      "epoch": 0.039998501928766715,
+      "grad_norm": 0.15014681220054626,
+      "learning_rate": 2.3078472917667092e-05,
+      "loss": 1.09,
+      "step": 267
+    },
+    {
+      "epoch": 0.04044792329875285,
+      "grad_norm": 0.1784486323595047,
+      "learning_rate": 2.2599424352307957e-05,
+      "loss": 1.0867,
+      "step": 270
+    },
+    {
+      "epoch": 0.040897344668739,
+      "grad_norm": 0.1629609763622284,
+      "learning_rate": 2.212126386697352e-05,
+      "loss": 1.0916,
+      "step": 273
+    },
+    {
+      "epoch": 0.04134676603872514,
+      "grad_norm": 0.1595894992351532,
+      "learning_rate": 2.164416835455862e-05,
+      "loss": 1.0537,
+      "step": 276
+    },
+    {
+      "epoch": 0.041796187408711286,
+      "grad_norm": 0.16272102296352386,
+      "learning_rate": 2.11683143139771e-05,
+      "loss": 1.1907,
+      "step": 279
+    },
+    {
+      "epoch": 0.04224560877869743,
+      "grad_norm": 0.15896858274936676,
+      "learning_rate": 2.069387778486703e-05,
+      "loss": 1.0492,
+      "step": 282
+    },
+    {
+      "epoch": 0.04269503014868357,
+      "grad_norm": 0.18164744973182678,
+      "learning_rate": 2.02210342824657e-05,
+      "loss": 1.064,
+      "step": 285
+    },
+    {
+      "epoch": 0.04314445151866971,
+      "grad_norm": 0.17921298742294312,
+      "learning_rate": 1.9749958732678767e-05,
+      "loss": 1.1456,
+      "step": 288
+    },
+    {
+      "epoch": 0.043593872888655856,
+      "grad_norm": 0.14521931111812592,
+      "learning_rate": 1.928082540736737e-05,
+      "loss": 1.1073,
+      "step": 291
+    },
+    {
+      "epoch": 0.044043294258642,
+      "grad_norm": 0.15362174808979034,
+      "learning_rate": 1.8813807859877147e-05,
+      "loss": 1.0171,
+      "step": 294
+    },
+    {
+      "epoch": 0.044043294258642,
+      "eval_loss": 1.0923734903335571,
+      "eval_runtime": 312.595,
+      "eval_samples_per_second": 35.967,
+      "eval_steps_per_second": 4.498,
+      "step": 294
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 9.381143263366349e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null