Training in progress, step 50, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +186 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fe7374d0b8ac716b8507ecf9270eea562cfbc457c14f41482217b18e43d686f
 size 200068512

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ad0941e19d8f3e16b529706a196d576f1dc861878473c9e0edc50ff839cc232
 size 200068512

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3a27f321751760857d35e93c684fbe0f1c9cf4fd0a51ecb365d01127d8b09b4
 size 400361770

 version https://git-lfs.github.com/spec/v1
+oid sha256:c26c475c7e3dddeffe4e842b10dad31ff7ea78176b5573f3765b8194a0a479d2
 size 400361770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0df8ed7adb1d364c823c2646d01c37a507149e968965e3614050fb718a783053
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c367127b1753877d85f6ae4547bc0da699d3dd6cb78fdd0fb4ec5a4d37b4d04a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f7e91af1a79d7676619e959cc4b5dde54a9e2ec825d24c1c34fe5387a27894f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d914dc409ae93ac75985631a9974025ebaa397f621307eef226e7d6e1076a1f6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.028216704288939052,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 3.387,
       "eval_steps_per_second": 1.696,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -217,7 +400,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.83447180771328e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.056433408577878104,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.387,
       "eval_steps_per_second": 1.696,
       "step": 25
+    },
+    {
+      "epoch": 0.029345372460496615,
+      "grad_norm": 5.593110084533691,
+      "learning_rate": 0.0001537299608346824,
+      "loss": 1.0647,
+      "step": 26
+    },
+    {
+      "epoch": 0.030474040632054177,
+      "grad_norm": 9.355537414550781,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 1.5519,
+      "step": 27
+    },
+    {
+      "epoch": 0.03160270880361174,
+      "grad_norm": 10.389217376708984,
+      "learning_rate": 0.00014617486132350343,
+      "loss": 1.8895,
+      "step": 28
+    },
+    {
+      "epoch": 0.0327313769751693,
+      "grad_norm": 9.101856231689453,
+      "learning_rate": 0.00014226182617406996,
+      "loss": 1.7764,
+      "step": 29
+    },
+    {
+      "epoch": 0.033860045146726865,
+      "grad_norm": 9.08023738861084,
+      "learning_rate": 0.000138268343236509,
+      "loss": 1.7249,
+      "step": 30
+    },
+    {
+      "epoch": 0.034988713318284424,
+      "grad_norm": 8.803888320922852,
+      "learning_rate": 0.00013420201433256689,
+      "loss": 1.5349,
+      "step": 31
+    },
+    {
+      "epoch": 0.03611738148984198,
+      "grad_norm": 9.191908836364746,
+      "learning_rate": 0.00013007057995042732,
+      "loss": 1.8036,
+      "step": 32
+    },
+    {
+      "epoch": 0.03724604966139955,
+      "grad_norm": 7.046676158905029,
+      "learning_rate": 0.00012588190451025207,
+      "loss": 1.3115,
+      "step": 33
+    },
+    {
+      "epoch": 0.03837471783295711,
+      "grad_norm": 8.129071235656738,
+      "learning_rate": 0.00012164396139381029,
+      "loss": 1.7446,
+      "step": 34
+    },
+    {
+      "epoch": 0.039503386004514675,
+      "grad_norm": 7.32548189163208,
+      "learning_rate": 0.00011736481776669306,
+      "loss": 1.2946,
+      "step": 35
+    },
+    {
+      "epoch": 0.040632054176072234,
+      "grad_norm": 8.391158103942871,
+      "learning_rate": 0.00011305261922200519,
+      "loss": 1.6682,
+      "step": 36
+    },
+    {
+      "epoch": 0.0417607223476298,
+      "grad_norm": 9.171820640563965,
+      "learning_rate": 0.00010871557427476583,
+      "loss": 1.6177,
+      "step": 37
+    },
+    {
+      "epoch": 0.04288939051918736,
+      "grad_norm": 8.600780487060547,
+      "learning_rate": 0.00010436193873653361,
+      "loss": 2.033,
+      "step": 38
+    },
+    {
+      "epoch": 0.04401805869074492,
+      "grad_norm": 7.288829326629639,
+      "learning_rate": 0.0001,
+      "loss": 1.6004,
+      "step": 39
+    },
+    {
+      "epoch": 0.045146726862302484,
+      "grad_norm": 10.171875953674316,
+      "learning_rate": 9.563806126346642e-05,
+      "loss": 1.6914,
+      "step": 40
+    },
+    {
+      "epoch": 0.046275395033860044,
+      "grad_norm": 5.748844623565674,
+      "learning_rate": 9.128442572523417e-05,
+      "loss": 1.0927,
+      "step": 41
+    },
+    {
+      "epoch": 0.04740406320541761,
+      "grad_norm": 7.213958263397217,
+      "learning_rate": 8.694738077799488e-05,
+      "loss": 1.536,
+      "step": 42
+    },
+    {
+      "epoch": 0.04853273137697517,
+      "grad_norm": 8.207345962524414,
+      "learning_rate": 8.263518223330697e-05,
+      "loss": 1.4197,
+      "step": 43
+    },
+    {
+      "epoch": 0.04966139954853273,
+      "grad_norm": 9.604900360107422,
+      "learning_rate": 7.835603860618972e-05,
+      "loss": 1.743,
+      "step": 44
+    },
+    {
+      "epoch": 0.050790067720090294,
+      "grad_norm": 7.322094440460205,
+      "learning_rate": 7.411809548974792e-05,
+      "loss": 1.522,
+      "step": 45
+    },
+    {
+      "epoch": 0.05191873589164785,
+      "grad_norm": 8.541365623474121,
+      "learning_rate": 6.992942004957271e-05,
+      "loss": 1.1983,
+      "step": 46
+    },
+    {
+      "epoch": 0.05304740406320542,
+      "grad_norm": 7.738641262054443,
+      "learning_rate": 6.579798566743314e-05,
+      "loss": 1.1686,
+      "step": 47
+    },
+    {
+      "epoch": 0.05417607223476298,
+      "grad_norm": 9.32544231414795,
+      "learning_rate": 6.173165676349103e-05,
+      "loss": 1.9263,
+      "step": 48
+    },
+    {
+      "epoch": 0.055304740406320545,
+      "grad_norm": 9.188823699951172,
+      "learning_rate": 5.773817382593008e-05,
+      "loss": 1.7528,
+      "step": 49
+    },
+    {
+      "epoch": 0.056433408577878104,
+      "grad_norm": 6.969271183013916,
+      "learning_rate": 5.382513867649663e-05,
+      "loss": 1.4092,
+      "step": 50
+    },
+    {
+      "epoch": 0.056433408577878104,
+      "eval_loss": 1.6539942026138306,
+      "eval_runtime": 220.8995,
+      "eval_samples_per_second": 3.382,
+      "eval_steps_per_second": 1.693,
+      "step": 50
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 7.66894361542656e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null