Training in progress, step 26, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9bb7687be6b879eed23aa2d8bbc65fb0f30c319fbd3016c8deb56e169a68f79
 size 767856

 version https://git-lfs.github.com/spec/v1
+oid sha256:b23fa9aaf0155071ceaa66c9a8d8329dbc866cd6c8cc1368b7e23c5f53082851
 size 767856

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea1f0da3ba2bf093de64a527e31a701b39a038bb6094f9fb6142ee800040d482
 size 1003002

 version https://git-lfs.github.com/spec/v1
+oid sha256:84d55f67d909c2d5e7c75923363a0582e63bbdad5fc59876aed12febc7f3f2f1
 size 1003002

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c2547eaf80d7f8345c62387653af67ac7fae5a8d41aac312dfcd2a10bad5a9ed
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:efd1f9f945d63f30a37dbb6e9479669c0d43f1dc4f315a3846d3a1bd7da9fa8f
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4395bafb1cf8adc0d9dbfe550d1b98763496e37a8fadb76c1ca1168a4a757f4e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:37841e69eda911caeb33edeefa0b2f140e72dcce247aeb757b2fe89c00d7887b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.008376963350785341,
   "eval_steps": 13,
-  "global_step": 13,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -114,6 +114,105 @@
       "eval_samples_per_second": 110.914,
       "eval_steps_per_second": 55.457,
       "step": 13
     }
   ],
   "logging_steps": 1,
@@ -133,7 +232,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 28235500879872.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.016753926701570682,
   "eval_steps": 13,
+  "global_step": 26,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 110.914,
       "eval_steps_per_second": 55.457,
       "step": 13
+    },
+    {
+      "epoch": 0.009021345146999597,
+      "grad_norm": 3.3199102878570557,
+      "learning_rate": 9.755282581475769e-05,
+      "loss": 7.7896,
+      "step": 14
+    },
+    {
+      "epoch": 0.009665726943213854,
+      "grad_norm": 3.909156322479248,
+      "learning_rate": 9.619397662556435e-05,
+      "loss": 7.9258,
+      "step": 15
+    },
+    {
+      "epoch": 0.010310108739428111,
+      "grad_norm": 2.004795551300049,
+      "learning_rate": 9.45503262094184e-05,
+      "loss": 7.6277,
+      "step": 16
+    },
+    {
+      "epoch": 0.010954490535642368,
+      "grad_norm": 1.8555487394332886,
+      "learning_rate": 9.263200821770461e-05,
+      "loss": 7.9832,
+      "step": 17
+    },
+    {
+      "epoch": 0.011598872331856625,
+      "grad_norm": 2.1525354385375977,
+      "learning_rate": 9.045084971874738e-05,
+      "loss": 7.4556,
+      "step": 18
+    },
+    {
+      "epoch": 0.012243254128070882,
+      "grad_norm": 2.464871406555176,
+      "learning_rate": 8.802029828000156e-05,
+      "loss": 7.3449,
+      "step": 19
+    },
+    {
+      "epoch": 0.01288763592428514,
+      "grad_norm": 1.7199599742889404,
+      "learning_rate": 8.535533905932738e-05,
+      "loss": 7.4561,
+      "step": 20
+    },
+    {
+      "epoch": 0.013532017720499397,
+      "grad_norm": 1.4880601167678833,
+      "learning_rate": 8.247240241650918e-05,
+      "loss": 7.7051,
+      "step": 21
+    },
+    {
+      "epoch": 0.014176399516713652,
+      "grad_norm": 2.7781991958618164,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 7.8179,
+      "step": 22
+    },
+    {
+      "epoch": 0.01482078131292791,
+      "grad_norm": 1.7467265129089355,
+      "learning_rate": 7.612492823579745e-05,
+      "loss": 7.6382,
+      "step": 23
+    },
+    {
+      "epoch": 0.015465163109142166,
+      "grad_norm": 2.2026655673980713,
+      "learning_rate": 7.269952498697734e-05,
+      "loss": 7.5733,
+      "step": 24
+    },
+    {
+      "epoch": 0.016109544905356425,
+      "grad_norm": 1.6499930620193481,
+      "learning_rate": 6.91341716182545e-05,
+      "loss": 8.1215,
+      "step": 25
+    },
+    {
+      "epoch": 0.016753926701570682,
+      "grad_norm": 3.187775135040283,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 7.2734,
+      "step": 26
+    },
+    {
+      "epoch": 0.016753926701570682,
+      "eval_loss": 7.397295951843262,
+      "eval_runtime": 23.7029,
+      "eval_samples_per_second": 110.282,
+      "eval_steps_per_second": 55.141,
+      "step": 26
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 56606749360128.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null