Training in progress, step 71, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +151 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab5bc6acbaa8ce95951d9bc086362d051cc671a95313f5263e293f09563b24f6
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:54f7e055eb33b1e56553e838f11135dcbc4f445604f5a5b8fa06c6cb80253b26
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ba33436ce0473f6e5f1afd066d593756f52463c0ddb72163b0227b811433b65
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:32eb42a97fb1e7b594cdc74ce0a52f9769c32dd1d2713884200d3b8d74145217
 size 202110330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f119cea98f2100accccefa6d8af5e4ec296b0150fc5a87b2ab91e8da89a8400
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:68ca88233e9cb44649a59652bc1e8ba8a3c58ab626c29c5d9dbab0e2ee670523
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0316daa00155cf81bfc0aa1ce434a1e7b984b9442dc1e55546a86795985439a3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f58f60c4ba500d392cc8d38ad7b8f0fd25342cba11e7d79f1fbba873c020ffe0
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.8514222502708435,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.5376344086021505,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,153 @@
       "eval_samples_per_second": 19.552,
       "eval_steps_per_second": 3.519,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -404,12 +551,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.992386062319616e+16,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.8514222502708435,
   "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.7634408602150538,
   "eval_steps": 25,
+  "global_step": 71,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 19.552,
       "eval_steps_per_second": 3.519,
       "step": 50
+    },
+    {
+      "epoch": 0.5483870967741935,
+      "grad_norm": 0.9954845309257507,
+      "learning_rate": 2.740299623723276e-05,
+      "loss": 0.7883,
+      "step": 51
+    },
+    {
+      "epoch": 0.5591397849462365,
+      "grad_norm": 0.9178506135940552,
+      "learning_rate": 2.5813805974175988e-05,
+      "loss": 0.8541,
+      "step": 52
+    },
+    {
+      "epoch": 0.5698924731182796,
+      "grad_norm": 1.0208925008773804,
+      "learning_rate": 2.4285108555160577e-05,
+      "loss": 0.7694,
+      "step": 53
+    },
+    {
+      "epoch": 0.5806451612903226,
+      "grad_norm": 1.0292917490005493,
+      "learning_rate": 2.2820072439029525e-05,
+      "loss": 0.8974,
+      "step": 54
+    },
+    {
+      "epoch": 0.5913978494623656,
+      "grad_norm": 1.062528133392334,
+      "learning_rate": 2.1421734136854156e-05,
+      "loss": 0.8796,
+      "step": 55
+    },
+    {
+      "epoch": 0.6021505376344086,
+      "grad_norm": 0.994775652885437,
+      "learning_rate": 2.0092991918301108e-05,
+      "loss": 0.7694,
+      "step": 56
+    },
+    {
+      "epoch": 0.6129032258064516,
+      "grad_norm": 0.9980999231338501,
+      "learning_rate": 1.883659980452598e-05,
+      "loss": 0.8256,
+      "step": 57
+    },
+    {
+      "epoch": 0.6236559139784946,
+      "grad_norm": 0.891745924949646,
+      "learning_rate": 1.765516186004387e-05,
+      "loss": 0.7038,
+      "step": 58
+    },
+    {
+      "epoch": 0.6344086021505376,
+      "grad_norm": 1.0422624349594116,
+      "learning_rate": 1.6551126795408016e-05,
+      "loss": 0.8348,
+      "step": 59
+    },
+    {
+      "epoch": 0.6451612903225806,
+      "grad_norm": 0.9354367852210999,
+      "learning_rate": 1.552678289188326e-05,
+      "loss": 0.7824,
+      "step": 60
+    },
+    {
+      "epoch": 0.6559139784946236,
+      "grad_norm": 1.0644829273223877,
+      "learning_rate": 1.4584253258633682e-05,
+      "loss": 0.8215,
+      "step": 61
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.8735445141792297,
+      "learning_rate": 1.3725491432254624e-05,
+      "loss": 0.7796,
+      "step": 62
+    },
+    {
+      "epoch": 0.6774193548387096,
+      "grad_norm": 0.9756590723991394,
+      "learning_rate": 1.2952277327769804e-05,
+      "loss": 0.7496,
+      "step": 63
+    },
+    {
+      "epoch": 0.6881720430107527,
+      "grad_norm": 1.0405932664871216,
+      "learning_rate": 1.2266213549485638e-05,
+      "loss": 0.7398,
+      "step": 64
+    },
+    {
+      "epoch": 0.6989247311827957,
+      "grad_norm": 1.0412120819091797,
+      "learning_rate": 1.1668722069349041e-05,
+      "loss": 0.7613,
+      "step": 65
+    },
+    {
+      "epoch": 0.7096774193548387,
+      "grad_norm": 1.0129188299179077,
+      "learning_rate": 1.1161041279693446e-05,
+      "loss": 0.8097,
+      "step": 66
+    },
+    {
+      "epoch": 0.7204301075268817,
+      "grad_norm": 1.0584628582000732,
+      "learning_rate": 1.074422342648161e-05,
+      "loss": 0.877,
+      "step": 67
+    },
+    {
+      "epoch": 0.7311827956989247,
+      "grad_norm": 1.030712366104126,
+      "learning_rate": 1.0419132428365116e-05,
+      "loss": 0.8288,
+      "step": 68
+    },
+    {
+      "epoch": 0.7419354838709677,
+      "grad_norm": 1.2178075313568115,
+      "learning_rate": 1.0186442086081093e-05,
+      "loss": 0.8672,
+      "step": 69
+    },
+    {
+      "epoch": 0.7526881720430108,
+      "grad_norm": 1.0627042055130005,
+      "learning_rate": 1.0046634685897261e-05,
+      "loss": 0.9266,
+      "step": 70
+    },
+    {
+      "epoch": 0.7634408602150538,
+      "grad_norm": 0.9612053632736206,
+      "learning_rate": 1e-05,
+      "loss": 0.8116,
+      "step": 71
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.8284068806262784e+16,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null