Training in progress, step 724, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +53 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9515a7002cfa7f67e6c746a7e59b868de77da444db2b95a7c9cb039f0922f08
 size 2145944

 version https://git-lfs.github.com/spec/v1
+oid sha256:227f14b5a2687735006c73ead50afcc55eac9584005c145efe00d779c806789b
 size 2145944

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7971e0a79864672f8071d25876b2985e088af6d6ac063d1237778d2eaf4fd19b
 size 4310020

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1a76c406a7be9b2b100b768746eaa7355f01cf42dc4eefe6eabcce6f8e87594
 size 4310020

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b753a7aacbc56b237a570162811e1a71ee10b02e1bee93e6daab70cd4680802
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35e5c3617dd4cf3452d6d89bc6fd0ebe6a2b8784de42225a2fdf51d60521b82
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1fb5420c0f25e5f6b2392bd3436352035eb0debaec1745bf48bdc44f8008fc2
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:dd30e2f855940fa3e33d3978da3fd180418222584fe4f4d19f30d42f33e20efd
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2c82e31e1ac17c78c3d52136bae19d297a1c91d03f90b220e02fe92fb6cac45
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c3af6f3adec49d0d0a6e9b544ac580b2e3a13bb94ed7476a3711d95a9ef6c6
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1705ef3a08286669695bd3c71729dc6c292514e0dbba93fac60bc2587d6740cc
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:67b2f832bdf633a8fbe2bd9786fa9777f8898f5039f16a5b1ec23b42e8ba211f
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27ebf93c877c54673a5776fbfe0e2f8ca99926be562e6e5f55038ce1f0e80b42
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f34e97a6907c815b8e7fd063af05c447efd3bc0a06dcb158bac0d08e94243947
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9893066574680924,
   "eval_steps": 500,
-  "global_step": 717,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -5026,6 +5026,55 @@
       "learning_rate": 3.10471474516183e-08,
       "loss": 9.782,
       "step": 717
     }
   ],
   "logging_steps": 1,
@@ -5040,12 +5089,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 595349320237056.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9989651604001379,
   "eval_steps": 500,
+  "global_step": 724,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.10471474516183e-08,
       "loss": 9.782,
       "step": 717
+    },
+    {
+      "epoch": 0.9906864436012418,
+      "grad_norm": 1.1444123983383179,
+      "learning_rate": 2.2810775523329773e-08,
+      "loss": 9.7736,
+      "step": 718
+    },
+    {
+      "epoch": 0.9920662297343912,
+      "grad_norm": 1.2123403549194336,
+      "learning_rate": 1.5841184405462895e-08,
+      "loss": 9.7574,
+      "step": 719
+    },
+    {
+      "epoch": 0.9934460158675406,
+      "grad_norm": 1.313451886177063,
+      "learning_rate": 1.0138550757493592e-08,
+      "loss": 9.757,
+      "step": 720
+    },
+    {
+      "epoch": 0.9948258020006899,
+      "grad_norm": 1.2594588994979858,
+      "learning_rate": 5.703019125102849e-09,
+      "loss": 9.744,
+      "step": 721
+    },
+    {
+      "epoch": 0.9962055881338393,
+      "grad_norm": 1.3528109788894653,
+      "learning_rate": 2.534701936512951e-09,
+      "loss": 9.7332,
+      "step": 722
+    },
+    {
+      "epoch": 0.9975853742669886,
+      "grad_norm": 1.4873722791671753,
+      "learning_rate": 6.336794996231188e-10,
+      "loss": 9.7142,
+      "step": 723
+    },
+    {
+      "epoch": 0.9989651604001379,
+      "grad_norm": 1.4233864545822144,
+      "learning_rate": 0.0,
+      "loss": 9.7126,
+      "step": 724
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 601161656696832.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null