Training in progress, step 71, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fb37c7f1390fda4f25085cb4d2bdd978d0822a3651e127ead75cc2efae5c52f
 size 90207248

 version https://git-lfs.github.com/spec/v1
+oid sha256:789f732f6ae158363be51dcdbef6f8f96975791a165ff7c3b3464144afe7f3ca
 size 90207248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad17691905a0a147fcdaffd2f03ac91b04280e4cef606a4311393b76f6b18fc9
 size 180543866

 version https://git-lfs.github.com/spec/v1
+oid sha256:b18bdf4ee3f5f194463526ff846ab8b4a350a31a46ea6ff09f036fa51901b049
 size 180543866

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98283eb7aa86ea8a9a39a588a7edf0a1aac84a6c5eeea894980663b69e80fc0e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d8ccd21da8722feb50ad01684192384ccf18d632b1f82a8d2d3bc2e16119623
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aec03bf87f279f0836b9767993a70ad750b638d565662d40d6a3ff55df0f361f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e683effdbf1801ca21c52b8334cbe1684c23fef6e33fad16cce510e4700ea65
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.127659574468085,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,153 @@
       "eval_samples_per_second": 29.014,
       "eval_steps_per_second": 14.507,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -395,12 +542,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9788035851878400.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.021276595744681,
   "eval_steps": 25,
+  "global_step": 71,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 29.014,
       "eval_steps_per_second": 14.507,
       "step": 50
+    },
+    {
+      "epoch": 2.1702127659574466,
+      "grad_norm": 4.252004623413086,
+      "learning_rate": 1.9868268181037185e-05,
+      "loss": 2.5753,
+      "step": 51
+    },
+    {
+      "epoch": 2.2127659574468086,
+      "grad_norm": 4.096494197845459,
+      "learning_rate": 1.8057659717401947e-05,
+      "loss": 2.25,
+      "step": 52
+    },
+    {
+      "epoch": 2.25531914893617,
+      "grad_norm": 2.2319579124450684,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 2.3128,
+      "step": 53
+    },
+    {
+      "epoch": 2.297872340425532,
+      "grad_norm": 2.539842367172241,
+      "learning_rate": 1.4644660940672627e-05,
+      "loss": 2.2327,
+      "step": 54
+    },
+    {
+      "epoch": 2.3404255319148937,
+      "grad_norm": 3.147387981414795,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 2.1849,
+      "step": 55
+    },
+    {
+      "epoch": 2.382978723404255,
+      "grad_norm": 3.685150146484375,
+      "learning_rate": 1.1533301450856054e-05,
+      "loss": 2.4277,
+      "step": 56
+    },
+    {
+      "epoch": 2.425531914893617,
+      "grad_norm": 4.715639591217041,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 2.5731,
+      "step": 57
+    },
+    {
+      "epoch": 2.4680851063829787,
+      "grad_norm": 2.3089892864227295,
+      "learning_rate": 8.75012627008489e-06,
+      "loss": 2.4525,
+      "step": 58
+    },
+    {
+      "epoch": 2.5106382978723403,
+      "grad_norm": 2.630906105041504,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 2.1467,
+      "step": 59
+    },
+    {
+      "epoch": 2.5531914893617023,
+      "grad_norm": 3.5077598094940186,
+      "learning_rate": 6.318880467681526e-06,
+      "loss": 2.4428,
+      "step": 60
+    },
+    {
+      "epoch": 2.595744680851064,
+      "grad_norm": 3.928353786468506,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 2.2675,
+      "step": 61
+    },
+    {
+      "epoch": 2.6382978723404253,
+      "grad_norm": 4.511261940002441,
+      "learning_rate": 4.260306575598949e-06,
+      "loss": 2.2792,
+      "step": 62
+    },
+    {
+      "epoch": 2.6808510638297873,
+      "grad_norm": 2.4075710773468018,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 2.3584,
+      "step": 63
+    },
+    {
+      "epoch": 2.723404255319149,
+      "grad_norm": 2.83492112159729,
+      "learning_rate": 2.591967620451707e-06,
+      "loss": 1.9989,
+      "step": 64
+    },
+    {
+      "epoch": 2.7659574468085104,
+      "grad_norm": 3.5723989009857178,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 2.2559,
+      "step": 65
+    },
+    {
+      "epoch": 2.8085106382978724,
+      "grad_norm": 3.7944796085357666,
+      "learning_rate": 1.328097281965357e-06,
+      "loss": 2.2629,
+      "step": 66
+    },
+    {
+      "epoch": 2.851063829787234,
+      "grad_norm": 4.501901626586914,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 2.3413,
+      "step": 67
+    },
+    {
+      "epoch": 2.8936170212765955,
+      "grad_norm": 2.496269464492798,
+      "learning_rate": 4.794784562397458e-07,
+      "loss": 2.2531,
+      "step": 68
+    },
+    {
+      "epoch": 2.9361702127659575,
+      "grad_norm": 2.9496850967407227,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 2.1338,
+      "step": 69
+    },
+    {
+      "epoch": 2.978723404255319,
+      "grad_norm": 4.252143383026123,
+      "learning_rate": 5.3351259881379014e-08,
+      "loss": 2.4166,
+      "step": 70
+    },
+    {
+      "epoch": 3.021276595744681,
+      "grad_norm": 7.078902721405029,
+      "learning_rate": 0.0,
+      "loss": 4.2177,
+      "step": 71
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3899010909667328e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null