Training in progress, step 50, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +188 -5

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be592ce1f3853fe0f4fccd4aff263082538030d4979aa3ff2d4a02d97f639e69
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:cc395991651c0da9e6d7e22177fee3d27127afeec15a9889e45bdf6bfdfa9e81
 size 202110330

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ba1b0869ff97188abacf4491d24d613a0836b9d45d229331cd9582aa6be709c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e93a3c4f009bbdf13d622d562686a18b2f1e12bbf2332725a6803e7d05ec23f
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:455753603a55395aa451b37d3949e99554034ffce0a27b34a4d05bd0a1c3b648
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:87e63a0b31275baba59dd032050acf21d5a2dcd0d2b8f732689cd95b0f040f78
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96212a13faf1e4c0b05ad866326992d8f175dbf4dce7054b726a0442f33423db
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:a78ba22bbaa36bb3081bb8cd1ef6eb9ea478d89c6f26cc84a59a7982d92335fe
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d5d5a3cfaf49445f355dc161fff3f6d89f12384479cfa4d8036ac01912906cc
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f08bd9344a2a1e1b49eb6ed7b8960517b08d1b295c8e2191f3b31c36c0d10aa
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:107607e336ff02e6e3fc135f6b55fc089901c7172808564e7046a8e836a537c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd8b969ce6a3f06e5c06504065f43d471f4f8e360f2eb91a5068e5fed0b0d9dc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
-  "epoch": 0.06991173643275365,
   "eval_steps": 25,
-  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -198,6 +198,189 @@
       "eval_samples_per_second": 99.502,
       "eval_steps_per_second": 25.871,
       "step": 25
     }
   ],
   "logging_steps": 1,
@@ -212,7 +395,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -221,12 +404,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.16708196040704e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": NaN,
   "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.1398234728655073,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 99.502,
       "eval_steps_per_second": 25.871,
       "step": 25
+    },
+    {
+      "epoch": 0.0727082058900638,
+      "grad_norm": NaN,
+      "learning_rate": 8.957804998855866e-05,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 0.07550467534737394,
+      "grad_norm": NaN,
+      "learning_rate": 8.860696646428693e-05,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.07830114480468409,
+      "grad_norm": NaN,
+      "learning_rate": 8.759913421559902e-05,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.08109761426199423,
+      "grad_norm": NaN,
+      "learning_rate": 8.655565529226198e-05,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 0.08389408371930437,
+      "grad_norm": NaN,
+      "learning_rate": 8.547767072315835e-05,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.08669055317661452,
+      "grad_norm": NaN,
+      "learning_rate": 8.436635926858759e-05,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 0.08948702263392468,
+      "grad_norm": NaN,
+      "learning_rate": 8.322293613130917e-05,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 0.09228349209123482,
+      "grad_norm": NaN,
+      "learning_rate": 8.204865162773613e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 0.09507996154854496,
+      "grad_norm": NaN,
+      "learning_rate": 8.084478982073247e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 0.09787643100585511,
+      "grad_norm": NaN,
+      "learning_rate": 7.961266711550922e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.10067290046316525,
+      "grad_norm": NaN,
+      "learning_rate": 7.835363082015468e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 0.1034693699204754,
+      "grad_norm": NaN,
+      "learning_rate": 7.706905767237288e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 0.10626583937778554,
+      "grad_norm": NaN,
+      "learning_rate": 7.576035233404096e-05,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 0.1090623088350957,
+      "grad_norm": NaN,
+      "learning_rate": 7.442894585523218e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 0.11185877829240584,
+      "grad_norm": NaN,
+      "learning_rate": 7.307629410938363e-05,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.11465524774971599,
+      "grad_norm": NaN,
+      "learning_rate": 7.170387620131993e-05,
+      "loss": 0.0,
+      "step": 41
+    },
+    {
+      "epoch": 0.11745171720702613,
+      "grad_norm": NaN,
+      "learning_rate": 7.031319284987394e-05,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 0.12024818666433627,
+      "grad_norm": NaN,
+      "learning_rate": 6.890576474687263e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 0.12304465612164642,
+      "grad_norm": NaN,
+      "learning_rate": 6.7483130894283e-05,
+      "loss": 0.0,
+      "step": 44
+    },
+    {
+      "epoch": 0.12584112557895658,
+      "grad_norm": NaN,
+      "learning_rate": 6.604684692133597e-05,
+      "loss": 0.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.1286375950362667,
+      "grad_norm": NaN,
+      "learning_rate": 6.459848338346861e-05,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 0.13143406449357686,
+      "grad_norm": NaN,
+      "learning_rate": 6.313962404494496e-05,
+      "loss": 0.0,
+      "step": 47
+    },
+    {
+      "epoch": 0.134230533950887,
+      "grad_norm": NaN,
+      "learning_rate": 6.167186414703289e-05,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 0.13702700340819715,
+      "grad_norm": NaN,
+      "learning_rate": 6.019680866363139e-05,
+      "loss": 0.0,
+      "step": 49
+    },
+    {
+      "epoch": 0.1398234728655073,
+      "grad_norm": NaN,
+      "learning_rate": 5.8716070546254966e-05,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.1398234728655073,
+      "eval_loss": NaN,
+      "eval_runtime": 0.502,
+      "eval_samples_per_second": 99.607,
+      "eval_steps_per_second": 25.898,
+      "step": 50
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.33416392081408e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null