Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34d6164534de2fe3dd76f76fab754d967c5a1cc75854a61375769f8f229a84ce
 size 78207176

 version https://git-lfs.github.com/spec/v1
+oid sha256:29890fcc5e35dfec54e593f8f6d829f5ee8409a0c5893782d9507a2e9d24c7bf
 size 78207176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6282b434c8263f14c489886eb009a73c00f78873f55c72315077efd92a0e1b7
 size 156656510

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a2185a62ddc5738ed2b63d1b79936ce327bb3a07607c595a526e514493baab8
 size 156656510

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b41a52b97ddb68065be69b6fe530ce0d6a484a406f1702250b019f3cdfff6d5
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9da2c11f86988b6e0985b751a6959206280cddecef26d7cb4589d6dbc816451
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.9427156448364258,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.025673940949935817,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 38.682,
       "eval_steps_per_second": 19.341,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5784891945910272.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.942334532737732,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.03423192126658109,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 38.682,
       "eval_steps_per_second": 19.341,
       "step": 150
+    },
+    {
+      "epoch": 0.02584510055626872,
+      "grad_norm": 0.11141515523195267,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 2.0646,
+      "step": 151
+    },
+    {
+      "epoch": 0.026016260162601626,
+      "grad_norm": 0.10801205039024353,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 2.085,
+      "step": 152
+    },
+    {
+      "epoch": 0.02618741976893453,
+      "grad_norm": 0.09455177932977676,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 1.9206,
+      "step": 153
+    },
+    {
+      "epoch": 0.026358579375267435,
+      "grad_norm": 0.08838347345590591,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 1.8645,
+      "step": 154
+    },
+    {
+      "epoch": 0.026529738981600343,
+      "grad_norm": 0.08785195648670197,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 1.7189,
+      "step": 155
+    },
+    {
+      "epoch": 0.026700898587933248,
+      "grad_norm": 0.09213719516992569,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 1.6594,
+      "step": 156
+    },
+    {
+      "epoch": 0.026872058194266153,
+      "grad_norm": 0.09761328995227814,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 1.8997,
+      "step": 157
+    },
+    {
+      "epoch": 0.027043217800599057,
+      "grad_norm": 0.09117046743631363,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 1.8121,
+      "step": 158
+    },
+    {
+      "epoch": 0.027214377406931965,
+      "grad_norm": 0.08685458451509476,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 1.8085,
+      "step": 159
+    },
+    {
+      "epoch": 0.02738553701326487,
+      "grad_norm": 0.09400554746389389,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 1.859,
+      "step": 160
+    },
+    {
+      "epoch": 0.027556696619597774,
+      "grad_norm": 0.0903010442852974,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 1.8646,
+      "step": 161
+    },
+    {
+      "epoch": 0.02772785622593068,
+      "grad_norm": 0.09318774938583374,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 1.9487,
+      "step": 162
+    },
+    {
+      "epoch": 0.027899015832263587,
+      "grad_norm": 0.08630430698394775,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 1.7142,
+      "step": 163
+    },
+    {
+      "epoch": 0.028070175438596492,
+      "grad_norm": 0.09236445277929306,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 1.8608,
+      "step": 164
+    },
+    {
+      "epoch": 0.028241335044929396,
+      "grad_norm": 0.10190917551517487,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 2.0014,
+      "step": 165
+    },
+    {
+      "epoch": 0.0284124946512623,
+      "grad_norm": 0.09969731420278549,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.8265,
+      "step": 166
+    },
+    {
+      "epoch": 0.02858365425759521,
+      "grad_norm": 0.09843458980321884,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 1.8552,
+      "step": 167
+    },
+    {
+      "epoch": 0.028754813863928114,
+      "grad_norm": 0.0932171493768692,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 1.8805,
+      "step": 168
+    },
+    {
+      "epoch": 0.02892597347026102,
+      "grad_norm": 0.09193925559520721,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 1.8708,
+      "step": 169
+    },
+    {
+      "epoch": 0.029097133076593923,
+      "grad_norm": 0.101052425801754,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 1.9583,
+      "step": 170
+    },
+    {
+      "epoch": 0.02926829268292683,
+      "grad_norm": 0.09988513588905334,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 1.9813,
+      "step": 171
+    },
+    {
+      "epoch": 0.029439452289259736,
+      "grad_norm": 0.09843932092189789,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 1.9067,
+      "step": 172
+    },
+    {
+      "epoch": 0.02961061189559264,
+      "grad_norm": 0.0977838858962059,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 1.8711,
+      "step": 173
+    },
+    {
+      "epoch": 0.029781771501925545,
+      "grad_norm": 0.10626049339771271,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 2.0638,
+      "step": 174
+    },
+    {
+      "epoch": 0.02995293110825845,
+      "grad_norm": 0.09931275993585587,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 1.8416,
+      "step": 175
+    },
+    {
+      "epoch": 0.030124090714591358,
+      "grad_norm": 0.10600997507572174,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 1.9066,
+      "step": 176
+    },
+    {
+      "epoch": 0.030295250320924262,
+      "grad_norm": 0.10138809680938721,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 1.7798,
+      "step": 177
+    },
+    {
+      "epoch": 0.030466409927257167,
+      "grad_norm": 0.10285711288452148,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 1.9305,
+      "step": 178
+    },
+    {
+      "epoch": 0.03063756953359007,
+      "grad_norm": 0.10435103625059128,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 1.7811,
+      "step": 179
+    },
+    {
+      "epoch": 0.03080872913992298,
+      "grad_norm": 0.10847938060760498,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 1.9549,
+      "step": 180
+    },
+    {
+      "epoch": 0.030979888746255884,
+      "grad_norm": 0.10423749685287476,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 1.7917,
+      "step": 181
+    },
+    {
+      "epoch": 0.03115104835258879,
+      "grad_norm": 0.11017204076051712,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 1.9272,
+      "step": 182
+    },
+    {
+      "epoch": 0.0313222079589217,
+      "grad_norm": 0.11599629372358322,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.8393,
+      "step": 183
+    },
+    {
+      "epoch": 0.0314933675652546,
+      "grad_norm": 0.10840439796447754,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 1.9099,
+      "step": 184
+    },
+    {
+      "epoch": 0.031664527171587506,
+      "grad_norm": 0.12284722179174423,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 2.0558,
+      "step": 185
+    },
+    {
+      "epoch": 0.03183568677792041,
+      "grad_norm": 0.11635725945234299,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 1.9249,
+      "step": 186
+    },
+    {
+      "epoch": 0.032006846384253315,
+      "grad_norm": 0.1333126574754715,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 1.9864,
+      "step": 187
+    },
+    {
+      "epoch": 0.03217800599058622,
+      "grad_norm": 0.13437993824481964,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 1.9203,
+      "step": 188
+    },
+    {
+      "epoch": 0.032349165596919124,
+      "grad_norm": 0.12741129100322723,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 1.8627,
+      "step": 189
+    },
+    {
+      "epoch": 0.032520325203252036,
+      "grad_norm": 0.13912999629974365,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 1.8976,
+      "step": 190
+    },
+    {
+      "epoch": 0.03269148480958494,
+      "grad_norm": 0.1495036631822586,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 1.9268,
+      "step": 191
+    },
+    {
+      "epoch": 0.032862644415917845,
+      "grad_norm": 0.14511996507644653,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 1.9895,
+      "step": 192
+    },
+    {
+      "epoch": 0.03303380402225075,
+      "grad_norm": 0.1583525389432907,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 2.0931,
+      "step": 193
+    },
+    {
+      "epoch": 0.033204963628583654,
+      "grad_norm": 0.16893316805362701,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 1.9645,
+      "step": 194
+    },
+    {
+      "epoch": 0.03337612323491656,
+      "grad_norm": 0.19505704939365387,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 1.6557,
+      "step": 195
+    },
+    {
+      "epoch": 0.033547282841249464,
+      "grad_norm": 0.20835180580615997,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 1.7806,
+      "step": 196
+    },
+    {
+      "epoch": 0.03371844244758237,
+      "grad_norm": 0.23454757034778595,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 1.9318,
+      "step": 197
+    },
+    {
+      "epoch": 0.03388960205391527,
+      "grad_norm": 0.2649778425693512,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 1.9983,
+      "step": 198
+    },
+    {
+      "epoch": 0.034060761660248184,
+      "grad_norm": 0.3988756537437439,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 2.2859,
+      "step": 199
+    },
+    {
+      "epoch": 0.03423192126658109,
+      "grad_norm": 0.5916358828544617,
+      "learning_rate": 0.0,
+      "loss": 2.5445,
+      "step": 200
+    },
+    {
+      "epoch": 0.03423192126658109,
+      "eval_loss": 1.942334532737732,
+      "eval_runtime": 253.457,
+      "eval_samples_per_second": 38.823,
+      "eval_steps_per_second": 19.412,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7688467500761088.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null