Training in progress, step 200, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1784586f8f2cf299656b6860e027c36ca5ade108b5113070ab9d4df6dafb77c
 size 78207176

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a056c28dbaa8204549bc9b1cc5120028c87b3f1952a3c119fc97bdabf8f95d6
 size 78207176

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:153683b062e07ceeeeb702fe3403d0f80a3b68c6212f129587ce055b20d01d76
 size 156656510

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d82086c62bdc47893981d7b0667f6f92d6a61c20f4982bcd46b68f4295a4a4c
 size 156656510

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b32c9cbb0b3679a30a8148c88e14b2b27ba1c5a25709be059b7a7242a73ded3e
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:975f9705aa94b638ede9106ed3c06008531e4a181ba8af050ae7d4c527e99a9a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfd59dca009004df561617f8f6994512d029a952a68609cac24b36df5a0757ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d754412c61116546142914503e7369d0cc35d3c380a07e5218f595d76b6d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.3460100591182709,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.10785547366528851,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1089,6 +1089,364 @@
       "eval_samples_per_second": 38.782,
       "eval_steps_per_second": 19.391,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1112,12 +1470,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4684773605769216.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.340755432844162,
+  "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.14380729822038468,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 38.782,
       "eval_steps_per_second": 19.391,
       "step": 150
+    },
+    {
+      "epoch": 0.10857451015639044,
+      "grad_norm": 0.22903931140899658,
+      "learning_rate": 1.9136088935510362e-05,
+      "loss": 0.2524,
+      "step": 151
+    },
+    {
+      "epoch": 0.10929354664749236,
+      "grad_norm": 0.28338053822517395,
+      "learning_rate": 1.8414449687337464e-05,
+      "loss": 0.2875,
+      "step": 152
+    },
+    {
+      "epoch": 0.11001258313859429,
+      "grad_norm": 0.45356854796409607,
+      "learning_rate": 1.7703596875660645e-05,
+      "loss": 0.4064,
+      "step": 153
+    },
+    {
+      "epoch": 0.1107316196296962,
+      "grad_norm": 0.26118019223213196,
+      "learning_rate": 1.700377325606388e-05,
+      "loss": 0.2664,
+      "step": 154
+    },
+    {
+      "epoch": 0.11145065612079813,
+      "grad_norm": 0.33390459418296814,
+      "learning_rate": 1.631521781767214e-05,
+      "loss": 0.4033,
+      "step": 155
+    },
+    {
+      "epoch": 0.11216969261190006,
+      "grad_norm": 0.2763619124889374,
+      "learning_rate": 1.5638165701536868e-05,
+      "loss": 0.39,
+      "step": 156
+    },
+    {
+      "epoch": 0.11288872910300198,
+      "grad_norm": 0.31019601225852966,
+      "learning_rate": 1.4972848120335453e-05,
+      "loss": 0.4017,
+      "step": 157
+    },
+    {
+      "epoch": 0.1136077655941039,
+      "grad_norm": 0.28100889921188354,
+      "learning_rate": 1.4319492279412388e-05,
+      "loss": 0.3625,
+      "step": 158
+    },
+    {
+      "epoch": 0.11432680208520582,
+      "grad_norm": 0.324801504611969,
+      "learning_rate": 1.3678321299188801e-05,
+      "loss": 0.3951,
+      "step": 159
+    },
+    {
+      "epoch": 0.11504583857630775,
+      "grad_norm": 0.30081889033317566,
+      "learning_rate": 1.3049554138967051e-05,
+      "loss": 0.3298,
+      "step": 160
+    },
+    {
+      "epoch": 0.11576487506740966,
+      "grad_norm": 0.971464991569519,
+      "learning_rate": 1.2433405522156332e-05,
+      "loss": 0.5244,
+      "step": 161
+    },
+    {
+      "epoch": 0.1164839115585116,
+      "grad_norm": 0.2432282269001007,
+      "learning_rate": 1.183008586294485e-05,
+      "loss": 0.3023,
+      "step": 162
+    },
+    {
+      "epoch": 0.11720294804961352,
+      "grad_norm": 0.24197106063365936,
+      "learning_rate": 1.1239801194443506e-05,
+      "loss": 0.2985,
+      "step": 163
+    },
+    {
+      "epoch": 0.11792198454071544,
+      "grad_norm": 0.281246155500412,
+      "learning_rate": 1.066275309832584e-05,
+      "loss": 0.3222,
+      "step": 164
+    },
+    {
+      "epoch": 0.11864102103181737,
+      "grad_norm": 0.29184016585350037,
+      "learning_rate": 1.0099138635988026e-05,
+      "loss": 0.2759,
+      "step": 165
+    },
+    {
+      "epoch": 0.11936005752291928,
+      "grad_norm": 0.2676764130592346,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.2669,
+      "step": 166
+    },
+    {
+      "epoch": 0.12007909401402121,
+      "grad_norm": 0.2783859074115753,
+      "learning_rate": 9.012975854638949e-06,
+      "loss": 0.3013,
+      "step": 167
+    },
+    {
+      "epoch": 0.12079813050512314,
+      "grad_norm": 0.21228177845478058,
+      "learning_rate": 8.490798459222476e-06,
+      "loss": 0.2835,
+      "step": 168
+    },
+    {
+      "epoch": 0.12151716699622506,
+      "grad_norm": 0.22429366409778595,
+      "learning_rate": 7.982796418105371e-06,
+      "loss": 0.2829,
+      "step": 169
+    },
+    {
+      "epoch": 0.12223620348732699,
+      "grad_norm": 0.19282086193561554,
+      "learning_rate": 7.489143213519301e-06,
+      "loss": 0.3026,
+      "step": 170
+    },
+    {
+      "epoch": 0.1229552399784289,
+      "grad_norm": 0.20128233730793,
+      "learning_rate": 7.010007427581378e-06,
+      "loss": 0.3188,
+      "step": 171
+    },
+    {
+      "epoch": 0.12367427646953083,
+      "grad_norm": 0.20810480415821075,
+      "learning_rate": 6.5455526847235825e-06,
+      "loss": 0.3205,
+      "step": 172
+    },
+    {
+      "epoch": 0.12439331296063275,
+      "grad_norm": 0.16887664794921875,
+      "learning_rate": 6.0959375958151045e-06,
+      "loss": 0.3056,
+      "step": 173
+    },
+    {
+      "epoch": 0.1251123494517347,
+      "grad_norm": 0.17027315497398376,
+      "learning_rate": 5.6613157039969055e-06,
+      "loss": 0.3144,
+      "step": 174
+    },
+    {
+      "epoch": 0.1258313859428366,
+      "grad_norm": 0.1935795694589615,
+      "learning_rate": 5.241835432246889e-06,
+      "loss": 0.3328,
+      "step": 175
+    },
+    {
+      "epoch": 0.12655042243393852,
+      "grad_norm": 0.1907937079668045,
+      "learning_rate": 4.837640032693558e-06,
+      "loss": 0.3334,
+      "step": 176
+    },
+    {
+      "epoch": 0.12726945892504044,
+      "grad_norm": 0.19159474968910217,
+      "learning_rate": 4.448867537695578e-06,
+      "loss": 0.3548,
+      "step": 177
+    },
+    {
+      "epoch": 0.12798849541614238,
+      "grad_norm": 0.18230465054512024,
+      "learning_rate": 4.075650712703849e-06,
+      "loss": 0.3516,
+      "step": 178
+    },
+    {
+      "epoch": 0.1287075319072443,
+      "grad_norm": 0.18190757930278778,
+      "learning_rate": 3.71811701092219e-06,
+      "loss": 0.3495,
+      "step": 179
+    },
+    {
+      "epoch": 0.1294265683983462,
+      "grad_norm": 0.15216723084449768,
+      "learning_rate": 3.376388529782215e-06,
+      "loss": 0.3314,
+      "step": 180
+    },
+    {
+      "epoch": 0.13014560488944815,
+      "grad_norm": 0.1875065267086029,
+      "learning_rate": 3.0505819692471792e-06,
+      "loss": 0.376,
+      "step": 181
+    },
+    {
+      "epoch": 0.13086464138055007,
+      "grad_norm": 0.15999597311019897,
+      "learning_rate": 2.7408085919590264e-06,
+      "loss": 0.3553,
+      "step": 182
+    },
+    {
+      "epoch": 0.13158367787165198,
+      "grad_norm": 0.19826200604438782,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.3845,
+      "step": 183
+    },
+    {
+      "epoch": 0.1323027143627539,
+      "grad_norm": 0.20129786431789398,
+      "learning_rate": 2.1697790249779636e-06,
+      "loss": 0.3787,
+      "step": 184
+    },
+    {
+      "epoch": 0.13302175085385584,
+      "grad_norm": 0.17354756593704224,
+      "learning_rate": 1.908717841359048e-06,
+      "loss": 0.3447,
+      "step": 185
+    },
+    {
+      "epoch": 0.13374078734495776,
+      "grad_norm": 0.18118330836296082,
+      "learning_rate": 1.6640797865406288e-06,
+      "loss": 0.3601,
+      "step": 186
+    },
+    {
+      "epoch": 0.13445982383605967,
+      "grad_norm": 0.18639330565929413,
+      "learning_rate": 1.4359484041943038e-06,
+      "loss": 0.3319,
+      "step": 187
+    },
+    {
+      "epoch": 0.13517886032716162,
+      "grad_norm": 0.20950943231582642,
+      "learning_rate": 1.2244016009781701e-06,
+      "loss": 0.3488,
+      "step": 188
+    },
+    {
+      "epoch": 0.13589789681826353,
+      "grad_norm": 0.2170008271932602,
+      "learning_rate": 1.0295116199317057e-06,
+      "loss": 0.3247,
+      "step": 189
+    },
+    {
+      "epoch": 0.13661693330936545,
+      "grad_norm": 0.2157934308052063,
+      "learning_rate": 8.513450158049108e-07,
+      "loss": 0.3485,
+      "step": 190
+    },
+    {
+      "epoch": 0.13733596980046736,
+      "grad_norm": 0.2358957827091217,
+      "learning_rate": 6.899626323298713e-07,
+      "loss": 0.3492,
+      "step": 191
+    },
+    {
+      "epoch": 0.1380550062915693,
+      "grad_norm": 0.19919978082180023,
+      "learning_rate": 5.454195814427021e-07,
+      "loss": 0.3563,
+      "step": 192
+    },
+    {
+      "epoch": 0.13877404278267122,
+      "grad_norm": 0.2569163143634796,
+      "learning_rate": 4.177652244628627e-07,
+      "loss": 0.3351,
+      "step": 193
+    },
+    {
+      "epoch": 0.13949307927377314,
+      "grad_norm": 0.23750156164169312,
+      "learning_rate": 3.0704315523631953e-07,
+      "loss": 0.331,
+      "step": 194
+    },
+    {
+      "epoch": 0.14021211576487508,
+      "grad_norm": 0.2309463918209076,
+      "learning_rate": 2.1329118524827662e-07,
+      "loss": 0.3331,
+      "step": 195
+    },
+    {
+      "epoch": 0.140931152255977,
+      "grad_norm": 0.20305994153022766,
+      "learning_rate": 1.3654133071059893e-07,
+      "loss": 0.2941,
+      "step": 196
+    },
+    {
+      "epoch": 0.1416501887470789,
+      "grad_norm": 0.22489352524280548,
+      "learning_rate": 7.681980162830282e-08,
+      "loss": 0.299,
+      "step": 197
+    },
+    {
+      "epoch": 0.14236922523818082,
+      "grad_norm": 0.24879539012908936,
+      "learning_rate": 3.4146992848854695e-08,
+      "loss": 0.2987,
+      "step": 198
+    },
+    {
+      "epoch": 0.14308826172928277,
+      "grad_norm": 0.2867515981197357,
+      "learning_rate": 8.537477097364522e-09,
+      "loss": 0.2943,
+      "step": 199
+    },
+    {
+      "epoch": 0.14380729822038468,
+      "grad_norm": 0.36997130513191223,
+      "learning_rate": 0.0,
+      "loss": 0.2709,
+      "step": 200
+    },
+    {
+      "epoch": 0.14380729822038468,
+      "eval_loss": 0.340755432844162,
+      "eval_runtime": 60.0302,
+      "eval_samples_per_second": 39.014,
+      "eval_steps_per_second": 19.507,
+      "step": 200
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 6229883634057216.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null