Training in progress, step 64, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:805896509ff9103abfa0a4a966d5713a4bd1c8741b438690f08f8ff9bcf9a8c5
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c35c9188534dff696e2c7c8d14e4c793f0241bf24c9eb49e50ebd0c8b2fcec5
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:504f644ebf18fb20cca271336024c1112033cc0eaa4cc55a1f807f81927c547f
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:60522b1def48b724a63c8ba611c3b3877737c88038937468179f77ef563f0056
 size 335922386

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c011fb688865d21f9702ea14ccc107031312eead0eacc5dd98d0ac2ef6bc99b
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec27a7948b1af9e386642f089cbf00d030ac5076d348d5d6cf62fd721edacb89
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a4d54e9fc5ebf69f3767b40c296d70c16d79ad04acb8328b23771380f1b5cd4d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a74c2697a5d2f34c7cd0889810082a6eb359f0cfe7581cc22d067d10bb5705e0
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7099c77b3a457f97f7c88b43d981367cdc640db4e9df385fd22eb51a05aec7c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0e0a7bbf100aa101bfb5a43ee6254102010ca2cbe8f455012b88d1102b67cdc
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a997b593ce85261f4c7fc16a49d26b47625891bf34dd47c65e1d0b386368723
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0775547a212ad9530d249fd95fad7940c94e5932139984768d41bb9352e5a7f4
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c4e947a403026a15276d2767326d990009a35d8fce1c5e4312326f271ea16f0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:59692df4f58e2dc39b05fe970cb9ef53e14c5bd82e536b0d52f878980bedcb4a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.3255813953488373,
   "eval_steps": 6,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -429,6 +429,120 @@
       "learning_rate": 1.5687918106563326e-05,
       "loss": 3.7934,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -443,12 +557,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.483774567120896e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.9767441860465116,
   "eval_steps": 6,
+  "global_step": 64,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.5687918106563326e-05,
       "loss": 3.7934,
       "step": 50
+    },
+    {
+      "epoch": 2.3720930232558137,
+      "grad_norm": 1.7052773237228394,
+      "learning_rate": 1.3631317921347563e-05,
+      "loss": 3.7972,
+      "step": 51
+    },
+    {
+      "epoch": 2.4186046511627906,
+      "grad_norm": 2.181281089782715,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 3.8197,
+      "step": 52
+    },
+    {
+      "epoch": 2.4651162790697674,
+      "grad_norm": 2.764360189437866,
+      "learning_rate": 9.893840362247809e-06,
+      "loss": 3.8114,
+      "step": 53
+    },
+    {
+      "epoch": 2.511627906976744,
+      "grad_norm": 2.347971200942993,
+      "learning_rate": 8.225609429353187e-06,
+      "loss": 3.8165,
+      "step": 54
+    },
+    {
+      "epoch": 2.511627906976744,
+      "eval_loss": 3.8157098293304443,
+      "eval_runtime": 1.3802,
+      "eval_samples_per_second": 51.442,
+      "eval_steps_per_second": 2.174,
+      "step": 54
+    },
+    {
+      "epoch": 2.558139534883721,
+      "grad_norm": 2.3315491676330566,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 3.7953,
+      "step": 55
+    },
+    {
+      "epoch": 2.604651162790698,
+      "grad_norm": 2.439504384994507,
+      "learning_rate": 5.318367983829392e-06,
+      "loss": 3.8505,
+      "step": 56
+    },
+    {
+      "epoch": 2.6511627906976747,
+      "grad_norm": 1.8666633367538452,
+      "learning_rate": 4.089194655986306e-06,
+      "loss": 3.8066,
+      "step": 57
+    },
+    {
+      "epoch": 2.697674418604651,
+      "grad_norm": 1.904817819595337,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 3.8182,
+      "step": 58
+    },
+    {
+      "epoch": 2.744186046511628,
+      "grad_norm": 3.3425002098083496,
+      "learning_rate": 2.100524384225555e-06,
+      "loss": 3.7978,
+      "step": 59
+    },
+    {
+      "epoch": 2.7906976744186047,
+      "grad_norm": 1.6687263250350952,
+      "learning_rate": 1.3477564710088098e-06,
+      "loss": 3.8162,
+      "step": 60
+    },
+    {
+      "epoch": 2.7906976744186047,
+      "eval_loss": 3.805279016494751,
+      "eval_runtime": 1.3795,
+      "eval_samples_per_second": 51.467,
+      "eval_steps_per_second": 2.175,
+      "step": 60
+    },
+    {
+      "epoch": 2.8372093023255816,
+      "grad_norm": 2.4629361629486084,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 3.7868,
+      "step": 61
+    },
+    {
+      "epoch": 2.883720930232558,
+      "grad_norm": 3.152594804763794,
+      "learning_rate": 3.380821129028489e-07,
+      "loss": 3.8117,
+      "step": 62
+    },
+    {
+      "epoch": 2.9302325581395348,
+      "grad_norm": 1.8460975885391235,
+      "learning_rate": 8.459208643659122e-08,
+      "loss": 3.8007,
+      "step": 63
+    },
+    {
+      "epoch": 2.9767441860465116,
+      "grad_norm": 3.2109551429748535,
+      "learning_rate": 0.0,
+      "loss": 3.8177,
+      "step": 64
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.899231445914747e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null