Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45a210e18f963e9364551fa2d459b87dd03e5eac7e046b3b0a235acfa7125416
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:16ffa2061f86ef65086dbc235364a3c77205cc5c9bbe3e549b10efc8a660cc80
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e8becf4a15cbc3a9497ac10dde0131e5776cfd4e9c635106f4b8972c6166e6c
 size 591203178

 version https://git-lfs.github.com/spec/v1
+oid sha256:022cb2be1e1178cd90c20fe1a5ebcc954a5a2099479d35e39e1796566f355317
 size 591203178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0ec23ceedda3600099b07b897f20df8cec807c62901f82ff73b0355ccf02a336
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:abb1df1de9a1ef560f024ee749e11f0224911ef63f7e8197f011275b9864b8b4
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ece3236edcb393fe9fe067fb4c27aaaf2d1a125595517cb84b2456d9b62475c0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 2.8629980087280273,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.00816293212521938,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 28.374,
       "eval_steps_per_second": 14.187,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.367527271694336e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 2.735139846801758,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.01632586425043876,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.374,
       "eval_steps_per_second": 14.187,
       "step": 50
+    },
+    {
+      "epoch": 0.008326190767723767,
+      "grad_norm": 2.1676950454711914,
+      "learning_rate": 7.938926261462366e-05,
+      "loss": 2.8331,
+      "step": 51
+    },
+    {
+      "epoch": 0.008489449410228153,
+      "grad_norm": 1.6274971961975098,
+      "learning_rate": 7.754484907260513e-05,
+      "loss": 2.6935,
+      "step": 52
+    },
+    {
+      "epoch": 0.00865270805273254,
+      "grad_norm": 1.3204303979873657,
+      "learning_rate": 7.564496387029532e-05,
+      "loss": 2.797,
+      "step": 53
+    },
+    {
+      "epoch": 0.008815966695236929,
+      "grad_norm": 1.0049480199813843,
+      "learning_rate": 7.369343312364993e-05,
+      "loss": 3.0168,
+      "step": 54
+    },
+    {
+      "epoch": 0.008979225337741316,
+      "grad_norm": 1.075304388999939,
+      "learning_rate": 7.169418695587791e-05,
+      "loss": 3.2103,
+      "step": 55
+    },
+    {
+      "epoch": 0.009142483980245704,
+      "grad_norm": 0.8386501669883728,
+      "learning_rate": 6.965125158269619e-05,
+      "loss": 3.072,
+      "step": 56
+    },
+    {
+      "epoch": 0.009305742622750092,
+      "grad_norm": 0.8426527976989746,
+      "learning_rate": 6.756874120406714e-05,
+      "loss": 2.752,
+      "step": 57
+    },
+    {
+      "epoch": 0.00946900126525448,
+      "grad_norm": 0.8528467416763306,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 2.8089,
+      "step": 58
+    },
+    {
+      "epoch": 0.009632259907758867,
+      "grad_norm": 0.904664158821106,
+      "learning_rate": 6.330184227833376e-05,
+      "loss": 2.8771,
+      "step": 59
+    },
+    {
+      "epoch": 0.009795518550263255,
+      "grad_norm": 0.7004697322845459,
+      "learning_rate": 6.112604669781572e-05,
+      "loss": 2.6807,
+      "step": 60
+    },
+    {
+      "epoch": 0.009958777192767642,
+      "grad_norm": 0.7690675258636475,
+      "learning_rate": 5.8927844739931834e-05,
+      "loss": 2.9141,
+      "step": 61
+    },
+    {
+      "epoch": 0.01012203583527203,
+      "grad_norm": 0.7248542904853821,
+      "learning_rate": 5.6711663290882776e-05,
+      "loss": 2.8913,
+      "step": 62
+    },
+    {
+      "epoch": 0.010285294477776418,
+      "grad_norm": 0.617339015007019,
+      "learning_rate": 5.448196544517168e-05,
+      "loss": 2.8585,
+      "step": 63
+    },
+    {
+      "epoch": 0.010448553120280805,
+      "grad_norm": 0.7304785251617432,
+      "learning_rate": 5.2243241517525754e-05,
+      "loss": 2.6997,
+      "step": 64
+    },
+    {
+      "epoch": 0.010611811762785193,
+      "grad_norm": 0.8117228150367737,
+      "learning_rate": 5e-05,
+      "loss": 2.8935,
+      "step": 65
+    },
+    {
+      "epoch": 0.01077507040528958,
+      "grad_norm": 0.7086424827575684,
+      "learning_rate": 4.775675848247427e-05,
+      "loss": 2.969,
+      "step": 66
+    },
+    {
+      "epoch": 0.010938329047793967,
+      "grad_norm": 0.715423583984375,
+      "learning_rate": 4.551803455482833e-05,
+      "loss": 2.5869,
+      "step": 67
+    },
+    {
+      "epoch": 0.011101587690298355,
+      "grad_norm": 0.7727792263031006,
+      "learning_rate": 4.328833670911724e-05,
+      "loss": 2.8389,
+      "step": 68
+    },
+    {
+      "epoch": 0.011264846332802742,
+      "grad_norm": 0.6761236786842346,
+      "learning_rate": 4.107215526006817e-05,
+      "loss": 2.709,
+      "step": 69
+    },
+    {
+      "epoch": 0.01142810497530713,
+      "grad_norm": 0.7613105773925781,
+      "learning_rate": 3.887395330218429e-05,
+      "loss": 2.8757,
+      "step": 70
+    },
+    {
+      "epoch": 0.011591363617811518,
+      "grad_norm": 0.8669199347496033,
+      "learning_rate": 3.6698157721666246e-05,
+      "loss": 3.1531,
+      "step": 71
+    },
+    {
+      "epoch": 0.011754622260315905,
+      "grad_norm": 0.8836479783058167,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 2.7458,
+      "step": 72
+    },
+    {
+      "epoch": 0.011917880902820293,
+      "grad_norm": 0.778107225894928,
+      "learning_rate": 3.243125879593286e-05,
+      "loss": 2.7683,
+      "step": 73
+    },
+    {
+      "epoch": 0.01208113954532468,
+      "grad_norm": 0.6686323285102844,
+      "learning_rate": 3.0348748417303823e-05,
+      "loss": 2.4992,
+      "step": 74
+    },
+    {
+      "epoch": 0.012244398187829068,
+      "grad_norm": 0.8155844807624817,
+      "learning_rate": 2.8305813044122097e-05,
+      "loss": 2.8288,
+      "step": 75
+    },
+    {
+      "epoch": 0.012407656830333456,
+      "grad_norm": 0.7905011773109436,
+      "learning_rate": 2.630656687635007e-05,
+      "loss": 2.7207,
+      "step": 76
+    },
+    {
+      "epoch": 0.012570915472837844,
+      "grad_norm": 0.821061372756958,
+      "learning_rate": 2.43550361297047e-05,
+      "loss": 2.6178,
+      "step": 77
+    },
+    {
+      "epoch": 0.012734174115342231,
+      "grad_norm": 0.9048423767089844,
+      "learning_rate": 2.245515092739488e-05,
+      "loss": 2.8052,
+      "step": 78
+    },
+    {
+      "epoch": 0.01289743275784662,
+      "grad_norm": 0.7810814380645752,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 2.9147,
+      "step": 79
+    },
+    {
+      "epoch": 0.013060691400351007,
+      "grad_norm": 0.887985348701477,
+      "learning_rate": 1.8825509907063327e-05,
+      "loss": 2.681,
+      "step": 80
+    },
+    {
+      "epoch": 0.013223950042855395,
+      "grad_norm": 0.8398605585098267,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 2.7676,
+      "step": 81
+    },
+    {
+      "epoch": 0.01338720868535978,
+      "grad_norm": 0.8217864036560059,
+      "learning_rate": 1.544686755065677e-05,
+      "loss": 2.7269,
+      "step": 82
+    },
+    {
+      "epoch": 0.013550467327864168,
+      "grad_norm": 0.9799962639808655,
+      "learning_rate": 1.3860256808630428e-05,
+      "loss": 2.7334,
+      "step": 83
+    },
+    {
+      "epoch": 0.013713725970368556,
+      "grad_norm": 0.7503747344017029,
+      "learning_rate": 1.2346426699819458e-05,
+      "loss": 2.6707,
+      "step": 84
+    },
+    {
+      "epoch": 0.013876984612872944,
+      "grad_norm": 0.9742613434791565,
+      "learning_rate": 1.090842587659851e-05,
+      "loss": 2.6392,
+      "step": 85
+    },
+    {
+      "epoch": 0.014040243255377331,
+      "grad_norm": 0.7584743499755859,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 2.6515,
+      "step": 86
+    },
+    {
+      "epoch": 0.014203501897881719,
+      "grad_norm": 0.7804672718048096,
+      "learning_rate": 8.271337313934869e-06,
+      "loss": 2.4738,
+      "step": 87
+    },
+    {
+      "epoch": 0.014366760540386107,
+      "grad_norm": 0.8059239983558655,
+      "learning_rate": 7.077560319906695e-06,
+      "loss": 2.494,
+      "step": 88
+    },
+    {
+      "epoch": 0.014530019182890494,
+      "grad_norm": 0.9126543402671814,
+      "learning_rate": 5.9702234071631e-06,
+      "loss": 2.6029,
+      "step": 89
+    },
+    {
+      "epoch": 0.014693277825394882,
+      "grad_norm": 0.8912916779518127,
+      "learning_rate": 4.951556604879048e-06,
+      "loss": 2.5753,
+      "step": 90
+    },
+    {
+      "epoch": 0.01485653646789927,
+      "grad_norm": 0.9568765163421631,
+      "learning_rate": 4.023611372427471e-06,
+      "loss": 2.3926,
+      "step": 91
+    },
+    {
+      "epoch": 0.015019795110403657,
+      "grad_norm": 1.004371166229248,
+      "learning_rate": 3.18825646801314e-06,
+      "loss": 2.7712,
+      "step": 92
+    },
+    {
+      "epoch": 0.015183053752908045,
+      "grad_norm": 1.0826361179351807,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 2.6105,
+      "step": 93
+    },
+    {
+      "epoch": 0.015346312395412433,
+      "grad_norm": 0.9342916011810303,
+      "learning_rate": 1.8018569652073381e-06,
+      "loss": 2.3885,
+      "step": 94
+    },
+    {
+      "epoch": 0.01550957103791682,
+      "grad_norm": 1.158676028251648,
+      "learning_rate": 1.2536043909088191e-06,
+      "loss": 2.4456,
+      "step": 95
+    },
+    {
+      "epoch": 0.015672829680421208,
+      "grad_norm": 1.0513296127319336,
+      "learning_rate": 8.035205700685167e-07,
+      "loss": 2.2801,
+      "step": 96
+    },
+    {
+      "epoch": 0.015836088322925594,
+      "grad_norm": 1.4424023628234863,
+      "learning_rate": 4.52511911603265e-07,
+      "loss": 2.6424,
+      "step": 97
+    },
+    {
+      "epoch": 0.015999346965429984,
+      "grad_norm": 1.7463353872299194,
+      "learning_rate": 2.012853002380466e-07,
+      "loss": 2.3797,
+      "step": 98
+    },
+    {
+      "epoch": 0.01616260560793437,
+      "grad_norm": 2.0830276012420654,
+      "learning_rate": 5.0346672934270534e-08,
+      "loss": 2.5996,
+      "step": 99
+    },
+    {
+      "epoch": 0.01632586425043876,
+      "grad_norm": 2.8903799057006836,
+      "learning_rate": 0.0,
+      "loss": 2.8615,
+      "step": 100
+    },
+    {
+      "epoch": 0.01632586425043876,
+      "eval_loss": 2.735139846801758,
+      "eval_runtime": 363.8754,
+      "eval_samples_per_second": 28.35,
+      "eval_steps_per_second": 14.175,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.728250925121536e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null