Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:266cd94871718a986e4ce23c183790fc4b0de8cc9aa2104fde09808eb0885d75
 size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:03e6b3e1d595414a7d7c782b65a9be536e02782b01c4d1af44ba2aceb43834f0
 size 1001465824

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e33311b799b7918ecff9dc9f8b578379d48f6bda91cf68adc1a95d45266093b
 size 509176980

 version https://git-lfs.github.com/spec/v1
+oid sha256:14c6db5f642cc4524ca1d733bd074dee4ddbfa7832cd101cbbb5fb6b5db1d572
 size 509176980

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83b302bd10368c09cb19ba400e1fa8ceca162eab977031b0bc94e411bbd47746
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:878afb72930bbe35f3f76161250c8b6ec3eee322b00164ea89731e27a1aeb55c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9042f02fbfca7c1dbbfe6d148e2a1de0ab7c9345d455fd2ba76f5d757c8ebcc0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b978c4f5387d059752764238c4bee9571cf3df7a5c984a19baed3fed0a46071c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.8225108225108224,
   "eval_steps": 15,
-  "global_step": 105,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -316,6 +316,49 @@
       "eval_samples_per_second": 4.439,
       "eval_steps_per_second": 0.595,
       "step": 105
     }
   ],
   "logging_steps": 3,
@@ -335,7 +378,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.403225279902515e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.0865800865800868,
   "eval_steps": 15,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.439,
       "eval_steps_per_second": 0.595,
       "step": 105
+    },
+    {
+      "epoch": 1.8744588744588744,
+      "grad_norm": 1.011114239692688,
+      "learning_rate": 3.43659629390117e-05,
+      "loss": 2.2031,
+      "step": 108
+    },
+    {
+      "epoch": 1.9264069264069263,
+      "grad_norm": 0.9879550933837891,
+      "learning_rate": 3.16475450103354e-05,
+      "loss": 2.1106,
+      "step": 111
+    },
+    {
+      "epoch": 1.9783549783549783,
+      "grad_norm": 1.0455985069274902,
+      "learning_rate": 2.899046657924992e-05,
+      "loss": 2.0031,
+      "step": 114
+    },
+    {
+      "epoch": 2.034632034632035,
+      "grad_norm": 1.1132123470306396,
+      "learning_rate": 2.6403608411631742e-05,
+      "loss": 1.843,
+      "step": 117
+    },
+    {
+      "epoch": 2.0865800865800868,
+      "grad_norm": 1.035132646560669,
+      "learning_rate": 2.389561657583681e-05,
+      "loss": 1.6831,
+      "step": 120
+    },
+    {
+      "epoch": 2.0865800865800868,
+      "eval_loss": 0.6776129603385925,
+      "eval_runtime": 21.8631,
+      "eval_samples_per_second": 4.437,
+      "eval_steps_per_second": 0.595,
+      "step": 120
     }
   ],
   "logging_steps": 3,
       "attributes": {}
     }
   },
+  "total_flos": 6.169594538911334e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null