Training in progress, step 494, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fd669b36c142db3b5ac9018afcce2cd8dcdd67a8f0aae5ba8ddc72a8bca0121
 size 4102239984

 version https://git-lfs.github.com/spec/v1
+oid sha256:fff17ef401d6e6ad527e825d1eafb7c8fd1e90b6f724e50fa522778c68104c9c
 size 4102239984

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:405b57b437f7310eb637920c1f9993e3c54797e99626d6cb1ee7955789ea5cae
 size 8204830696

 version https://git-lfs.github.com/spec/v1
+oid sha256:343c6b4955f08eef048f3d7f9a6f80d0d448a7ab6cf7b58f095c7bd4723bdeb2
 size 8204830696

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8a5d538bb0eaa69aecbaf9562eddfe3aac9c94b0e90b5825e6721ab12978dc1
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:aad7d87d2cae58a23d1b2313ff708823206e2a45a1d1364fc765c5d30d52892d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50bf10ab16acdf754678aad1e5a7a0f326946162adfbb22565a648cfbb9b4bdb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca23e5c877c9d2e3eb941aadb9b0bac9e08da5e990c782ceb720152ffa5e60a9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.966144517433047,
   "eval_steps": 500,
-  "global_step": 478,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3353,6 +3353,118 @@
       "learning_rate": 4.0634802034176244e-07,
       "loss": 1.3784,
       "step": 478
     }
   ],
   "logging_steps": 1,
@@ -3367,12 +3479,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.1989875311968256e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9984840828701365,
   "eval_steps": 500,
+  "global_step": 494,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 4.0634802034176244e-07,
       "loss": 1.3784,
       "step": 478
+    },
+    {
+      "epoch": 0.9681657402728651,
+      "grad_norm": 5.375519275665283,
+      "learning_rate": 3.572004926893413e-07,
+      "loss": 1.5642,
+      "step": 479
+    },
+    {
+      "epoch": 0.9701869631126832,
+      "grad_norm": 5.717057704925537,
+      "learning_rate": 3.112091414176621e-07,
+      "loss": 1.7185,
+      "step": 480
+    },
+    {
+      "epoch": 0.9722081859525012,
+      "grad_norm": 4.669751167297363,
+      "learning_rate": 2.6837689055232426e-07,
+      "loss": 1.2948,
+      "step": 481
+    },
+    {
+      "epoch": 0.9742294087923193,
+      "grad_norm": 4.622890949249268,
+      "learning_rate": 2.287064632705005e-07,
+      "loss": 1.1733,
+      "step": 482
+    },
+    {
+      "epoch": 0.9762506316321374,
+      "grad_norm": 4.939849376678467,
+      "learning_rate": 1.9220038172780842e-07,
+      "loss": 1.3322,
+      "step": 483
+    },
+    {
+      "epoch": 0.9782718544719555,
+      "grad_norm": 5.243642330169678,
+      "learning_rate": 1.588609668979446e-07,
+      "loss": 1.2098,
+      "step": 484
+    },
+    {
+      "epoch": 0.9802930773117736,
+      "grad_norm": 5.232309818267822,
+      "learning_rate": 1.286903384251581e-07,
+      "loss": 1.2723,
+      "step": 485
+    },
+    {
+      "epoch": 0.9823143001515917,
+      "grad_norm": 6.266340732574463,
+      "learning_rate": 1.0169041448943039e-07,
+      "loss": 1.2856,
+      "step": 486
+    },
+    {
+      "epoch": 0.9843355229914098,
+      "grad_norm": 6.7853851318359375,
+      "learning_rate": 7.78629116845786e-08,
+      "loss": 1.2888,
+      "step": 487
+    },
+    {
+      "epoch": 0.9863567458312279,
+      "grad_norm": 5.390272617340088,
+      "learning_rate": 5.7209344909076036e-08,
+      "loss": 1.271,
+      "step": 488
+    },
+    {
+      "epoch": 0.988377968671046,
+      "grad_norm": 5.515243053436279,
+      "learning_rate": 3.973102726976819e-08,
+      "loss": 1.1683,
+      "step": 489
+    },
+    {
+      "epoch": 0.9903991915108641,
+      "grad_norm": 5.492679595947266,
+      "learning_rate": 2.542906999836725e-08,
+      "loss": 1.2462,
+      "step": 490
+    },
+    {
+      "epoch": 0.9924204143506822,
+      "grad_norm": 5.038405418395996,
+      "learning_rate": 1.4304382380819769e-08,
+      "loss": 1.1106,
+      "step": 491
+    },
+    {
+      "epoch": 0.9944416371905003,
+      "grad_norm": 5.300425052642822,
+      "learning_rate": 6.357671699486201e-09,
+      "loss": 1.1875,
+      "step": 492
+    },
+    {
+      "epoch": 0.9964628600303184,
+      "grad_norm": 5.133315086364746,
+      "learning_rate": 1.5894431881657845e-09,
+      "loss": 1.1627,
+      "step": 493
+    },
+    {
+      "epoch": 0.9984840828701365,
+      "grad_norm": 5.80331563949585,
+      "learning_rate": 0.0,
+      "loss": 1.1667,
+      "step": 494
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.2725168948314112e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null