Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +364 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:faba036e91248b2cce80b7ea70b8ec8ff876fd85373eed100e3d0d03d1824baa
 size 13587864

 version https://git-lfs.github.com/spec/v1
+oid sha256:614964fabe82285d67a51a3d40e562e051392eccbb748842a3cbc17bf63569f7
 size 13587864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:22aa3de46d21802c916055442529dac6a95f2c51fd429005d807d1bc774d0511
 size 27273018

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c008826cde0d895c1c4107fd9f3e5767bb2bda3ae633c3b917ef18cb6f7b587
 size 27273018

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7fbdfebe7d519c456ce3866535d28c2b7cb1a1804711d82e33d7a5856bcf7b47
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5523777bc41848d4797ac61c85e11d1b21d16faae2065e567f3f00076a1372d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a89ffc445067fef9d6d02bb3ff9e61d5e3209e6fa67c7259b3b364b90dbaa2cd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d60a69e2379be2053e816cbaff31e6c931b5922dd86c71c9eaf473299cbf62
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4491695165634155,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.0427715996578272,
   "eval_steps": 50,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -373,6 +373,364 @@
       "eval_samples_per_second": 133.755,
       "eval_steps_per_second": 33.541,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -396,12 +754,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 770548602765312.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.874006450176239,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0855431993156544,
   "eval_steps": 50,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 133.755,
       "eval_steps_per_second": 33.541,
       "step": 50
+    },
+    {
+      "epoch": 0.04362703165098375,
+      "grad_norm": 43.02643966674805,
+      "learning_rate": 5.695865504800327e-05,
+      "loss": 9.5889,
+      "step": 51
+    },
+    {
+      "epoch": 0.04448246364414029,
+      "grad_norm": 44.06403350830078,
+      "learning_rate": 5.522642316338268e-05,
+      "loss": 9.5999,
+      "step": 52
+    },
+    {
+      "epoch": 0.045337895637296836,
+      "grad_norm": 31.32398223876953,
+      "learning_rate": 5.348782368720626e-05,
+      "loss": 7.1508,
+      "step": 53
+    },
+    {
+      "epoch": 0.046193327630453376,
+      "grad_norm": 17.837383270263672,
+      "learning_rate": 5.174497483512506e-05,
+      "loss": 2.9384,
+      "step": 54
+    },
+    {
+      "epoch": 0.04704875962360992,
+      "grad_norm": 7.213578701019287,
+      "learning_rate": 5e-05,
+      "loss": 0.9618,
+      "step": 55
+    },
+    {
+      "epoch": 0.04790419161676647,
+      "grad_norm": 5.207120418548584,
+      "learning_rate": 4.825502516487497e-05,
+      "loss": 0.7529,
+      "step": 56
+    },
+    {
+      "epoch": 0.04875962360992301,
+      "grad_norm": 4.69100284576416,
+      "learning_rate": 4.6512176312793736e-05,
+      "loss": 0.6933,
+      "step": 57
+    },
+    {
+      "epoch": 0.04961505560307956,
+      "grad_norm": 2.485616683959961,
+      "learning_rate": 4.477357683661734e-05,
+      "loss": 0.4986,
+      "step": 58
+    },
+    {
+      "epoch": 0.0504704875962361,
+      "grad_norm": 1.7521893978118896,
+      "learning_rate": 4.3041344951996746e-05,
+      "loss": 0.382,
+      "step": 59
+    },
+    {
+      "epoch": 0.05132591958939264,
+      "grad_norm": 1.4033925533294678,
+      "learning_rate": 4.131759111665349e-05,
+      "loss": 0.343,
+      "step": 60
+    },
+    {
+      "epoch": 0.05218135158254919,
+      "grad_norm": 1.392710566520691,
+      "learning_rate": 3.960441545911204e-05,
+      "loss": 0.3722,
+      "step": 61
+    },
+    {
+      "epoch": 0.05303678357570573,
+      "grad_norm": 1.1755892038345337,
+      "learning_rate": 3.790390522001662e-05,
+      "loss": 0.3236,
+      "step": 62
+    },
+    {
+      "epoch": 0.05389221556886228,
+      "grad_norm": 0.8601241707801819,
+      "learning_rate": 3.6218132209150045e-05,
+      "loss": 0.2427,
+      "step": 63
+    },
+    {
+      "epoch": 0.05474764756201882,
+      "grad_norm": 0.9155849814414978,
+      "learning_rate": 3.4549150281252636e-05,
+      "loss": 0.3333,
+      "step": 64
+    },
+    {
+      "epoch": 0.055603079555175364,
+      "grad_norm": 0.8021154403686523,
+      "learning_rate": 3.289899283371657e-05,
+      "loss": 0.2494,
+      "step": 65
+    },
+    {
+      "epoch": 0.05645851154833191,
+      "grad_norm": 0.8067910075187683,
+      "learning_rate": 3.12696703292044e-05,
+      "loss": 0.2427,
+      "step": 66
+    },
+    {
+      "epoch": 0.05731394354148845,
+      "grad_norm": 0.6124118566513062,
+      "learning_rate": 2.9663167846209998e-05,
+      "loss": 0.2362,
+      "step": 67
+    },
+    {
+      "epoch": 0.058169375534645,
+      "grad_norm": 0.7200761437416077,
+      "learning_rate": 2.8081442660546125e-05,
+      "loss": 0.2506,
+      "step": 68
+    },
+    {
+      "epoch": 0.05902480752780154,
+      "grad_norm": 0.6140468120574951,
+      "learning_rate": 2.6526421860705473e-05,
+      "loss": 0.242,
+      "step": 69
+    },
+    {
+      "epoch": 0.059880239520958084,
+      "grad_norm": 0.8597086071968079,
+      "learning_rate": 2.500000000000001e-05,
+      "loss": 0.3553,
+      "step": 70
+    },
+    {
+      "epoch": 0.06073567151411463,
+      "grad_norm": 0.8153907656669617,
+      "learning_rate": 2.350403678833976e-05,
+      "loss": 0.2191,
+      "step": 71
+    },
+    {
+      "epoch": 0.06159110350727117,
+      "grad_norm": 0.5583605766296387,
+      "learning_rate": 2.2040354826462668e-05,
+      "loss": 0.2033,
+      "step": 72
+    },
+    {
+      "epoch": 0.06244653550042772,
+      "grad_norm": 0.5807018876075745,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 0.2504,
+      "step": 73
+    },
+    {
+      "epoch": 0.06330196749358426,
+      "grad_norm": 0.563032865524292,
+      "learning_rate": 1.9216926233717085e-05,
+      "loss": 0.2386,
+      "step": 74
+    },
+    {
+      "epoch": 0.0641573994867408,
+      "grad_norm": 0.4548931121826172,
+      "learning_rate": 1.7860619515673033e-05,
+      "loss": 0.2231,
+      "step": 75
+    },
+    {
+      "epoch": 0.06501283147989735,
+      "grad_norm": 0.545421838760376,
+      "learning_rate": 1.6543469682057106e-05,
+      "loss": 0.2811,
+      "step": 76
+    },
+    {
+      "epoch": 0.0658682634730539,
+      "grad_norm": 0.8195017576217651,
+      "learning_rate": 1.526708147705013e-05,
+      "loss": 0.2811,
+      "step": 77
+    },
+    {
+      "epoch": 0.06672369546621043,
+      "grad_norm": 0.7224476337432861,
+      "learning_rate": 1.4033009983067452e-05,
+      "loss": 0.3034,
+      "step": 78
+    },
+    {
+      "epoch": 0.06757912745936698,
+      "grad_norm": 0.738802433013916,
+      "learning_rate": 1.2842758726130283e-05,
+      "loss": 0.3257,
+      "step": 79
+    },
+    {
+      "epoch": 0.06843455945252352,
+      "grad_norm": 0.7129746079444885,
+      "learning_rate": 1.1697777844051105e-05,
+      "loss": 0.3329,
+      "step": 80
+    },
+    {
+      "epoch": 0.06928999144568007,
+      "grad_norm": 0.7020834684371948,
+      "learning_rate": 1.0599462319663905e-05,
+      "loss": 0.307,
+      "step": 81
+    },
+    {
+      "epoch": 0.07014542343883662,
+      "grad_norm": 0.5662789940834045,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 0.3007,
+      "step": 82
+    },
+    {
+      "epoch": 0.07100085543199315,
+      "grad_norm": 0.6873999238014221,
+      "learning_rate": 8.548121372247918e-06,
+      "loss": 0.34,
+      "step": 83
+    },
+    {
+      "epoch": 0.0718562874251497,
+      "grad_norm": 0.7224177718162537,
+      "learning_rate": 7.597595192178702e-06,
+      "loss": 0.3833,
+      "step": 84
+    },
+    {
+      "epoch": 0.07271171941830624,
+      "grad_norm": 1.1468607187271118,
+      "learning_rate": 6.698729810778065e-06,
+      "loss": 0.3873,
+      "step": 85
+    },
+    {
+      "epoch": 0.07356715141146279,
+      "grad_norm": 0.7172346115112305,
+      "learning_rate": 5.852620357053651e-06,
+      "loss": 0.3333,
+      "step": 86
+    },
+    {
+      "epoch": 0.07442258340461934,
+      "grad_norm": 0.7859193086624146,
+      "learning_rate": 5.060297685041659e-06,
+      "loss": 0.4443,
+      "step": 87
+    },
+    {
+      "epoch": 0.07527801539777587,
+      "grad_norm": 0.7493287324905396,
+      "learning_rate": 4.322727117869951e-06,
+      "loss": 0.3925,
+      "step": 88
+    },
+    {
+      "epoch": 0.07613344739093242,
+      "grad_norm": 1.187333106994629,
+      "learning_rate": 3.6408072716606346e-06,
+      "loss": 0.5375,
+      "step": 89
+    },
+    {
+      "epoch": 0.07698887938408897,
+      "grad_norm": 1.1516120433807373,
+      "learning_rate": 3.0153689607045845e-06,
+      "loss": 0.5597,
+      "step": 90
+    },
+    {
+      "epoch": 0.07784431137724551,
+      "grad_norm": 1.370631217956543,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 0.6242,
+      "step": 91
+    },
+    {
+      "epoch": 0.07869974337040206,
+      "grad_norm": 1.2987772226333618,
+      "learning_rate": 1.9369152030840556e-06,
+      "loss": 0.6291,
+      "step": 92
+    },
+    {
+      "epoch": 0.07955517536355859,
+      "grad_norm": 1.182003378868103,
+      "learning_rate": 1.4852136862001764e-06,
+      "loss": 0.7286,
+      "step": 93
+    },
+    {
+      "epoch": 0.08041060735671514,
+      "grad_norm": 2.1859383583068848,
+      "learning_rate": 1.0926199633097157e-06,
+      "loss": 0.7186,
+      "step": 94
+    },
+    {
+      "epoch": 0.08126603934987169,
+      "grad_norm": 2.0418176651000977,
+      "learning_rate": 7.596123493895991e-07,
+      "loss": 0.7344,
+      "step": 95
+    },
+    {
+      "epoch": 0.08212147134302823,
+      "grad_norm": 1.459978461265564,
+      "learning_rate": 4.865965629214819e-07,
+      "loss": 0.7504,
+      "step": 96
+    },
+    {
+      "epoch": 0.08297690333618478,
+      "grad_norm": 2.8905394077301025,
+      "learning_rate": 2.7390523158633554e-07,
+      "loss": 0.9282,
+      "step": 97
+    },
+    {
+      "epoch": 0.08383233532934131,
+      "grad_norm": 2.008319854736328,
+      "learning_rate": 1.2179748700879012e-07,
+      "loss": 1.0452,
+      "step": 98
+    },
+    {
+      "epoch": 0.08468776732249786,
+      "grad_norm": 2.0289366245269775,
+      "learning_rate": 3.04586490452119e-08,
+      "loss": 1.0555,
+      "step": 99
+    },
+    {
+      "epoch": 0.0855431993156544,
+      "grad_norm": 5.0649800300598145,
+      "learning_rate": 0.0,
+      "loss": 1.9191,
+      "step": 100
+    },
+    {
+      "epoch": 0.0855431993156544,
+      "eval_loss": 0.874006450176239,
+      "eval_runtime": 7.3781,
+      "eval_samples_per_second": 133.503,
+      "eval_steps_per_second": 33.477,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1537626265878528.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null