Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0929439cfbd351d2e41c2f9df7753fa93beba10f033796889afa34d8a100f91
 size 26008

 version https://git-lfs.github.com/spec/v1
+oid sha256:e640d3fe8ccac74d7dd5f59a4f540cbd2816fc170518bbcfe2cee5b22df0a709
 size 26008

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14f48fc54a39ff7abf993311580fcc151a65a20c34de3722473f6bd848a5d210
 size 68874

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7c2d0bb77b118fd3dcc6871612bb4b5c1e6df9a257657683b8ec7c894066b8e
 size 68874

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b734b05960d846435c1c04efaf275e1e0472f26df361c349e90d725168804580
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:95a86146ec46b696257dcafe77469077b2b6354f112fa62cf4506f458b954a6c
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad2841b888ce0ae948634757c3fcacf0119c249e0fec8f3ca61ea266369ef92
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5d2a6c6aafc669cea03b9634666f204de949a3d45ce2f48a07e7e3eaf18c715
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 11.926169395446777,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.020713046630246226,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 204.526,
       "eval_steps_per_second": 28.634,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 79631155200.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 11.915943145751953,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.04142609326049245,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 204.526,
       "eval_steps_per_second": 28.634,
       "step": 50
+    },
+    {
+      "epoch": 0.021127307562851153,
+      "grad_norm": 0.06076189503073692,
+      "learning_rate": 0.0002668315918143169,
+      "loss": 11.9242,
+      "step": 51
+    },
+    {
+      "epoch": 0.021541568495456076,
+      "grad_norm": 0.0622175894677639,
+      "learning_rate": 0.00026526016662852886,
+      "loss": 11.9228,
+      "step": 52
+    },
+    {
+      "epoch": 0.021955829428061,
+      "grad_norm": 0.05812002345919609,
+      "learning_rate": 0.00026365723046405023,
+      "loss": 11.9241,
+      "step": 53
+    },
+    {
+      "epoch": 0.022370090360665923,
+      "grad_norm": 0.05627156049013138,
+      "learning_rate": 0.0002620232215476231,
+      "loss": 11.9206,
+      "step": 54
+    },
+    {
+      "epoch": 0.02278435129327085,
+      "grad_norm": 0.04959167540073395,
+      "learning_rate": 0.0002603585866009697,
+      "loss": 11.9216,
+      "step": 55
+    },
+    {
+      "epoch": 0.023198612225875773,
+      "grad_norm": 0.05402550846338272,
+      "learning_rate": 0.00025866378071866334,
+      "loss": 11.9216,
+      "step": 56
+    },
+    {
+      "epoch": 0.023612873158480697,
+      "grad_norm": 0.049663037061691284,
+      "learning_rate": 0.00025693926724370956,
+      "loss": 11.9207,
+      "step": 57
+    },
+    {
+      "epoch": 0.024027134091085624,
+      "grad_norm": 0.045585885643959045,
+      "learning_rate": 0.00025518551764087326,
+      "loss": 11.9232,
+      "step": 58
+    },
+    {
+      "epoch": 0.024441395023690547,
+      "grad_norm": 0.05635461211204529,
+      "learning_rate": 0.00025340301136778483,
+      "loss": 11.922,
+      "step": 59
+    },
+    {
+      "epoch": 0.02485565595629547,
+      "grad_norm": 0.05952185392379761,
+      "learning_rate": 0.00025159223574386114,
+      "loss": 11.9209,
+      "step": 60
+    },
+    {
+      "epoch": 0.025269916888900398,
+      "grad_norm": 0.04048493504524231,
+      "learning_rate": 0.0002497536858170772,
+      "loss": 11.9213,
+      "step": 61
+    },
+    {
+      "epoch": 0.02568417782150532,
+      "grad_norm": 0.050521329045295715,
+      "learning_rate": 0.00024788786422862526,
+      "loss": 11.9192,
+      "step": 62
+    },
+    {
+      "epoch": 0.026098438754110245,
+      "grad_norm": 0.043353911489248276,
+      "learning_rate": 0.00024599528107549745,
+      "loss": 11.9202,
+      "step": 63
+    },
+    {
+      "epoch": 0.026512699686715168,
+      "grad_norm": 0.0434187576174736,
+      "learning_rate": 0.00024407645377103054,
+      "loss": 11.9171,
+      "step": 64
+    },
+    {
+      "epoch": 0.026926960619320095,
+      "grad_norm": 0.0419292189180851,
+      "learning_rate": 0.00024213190690345018,
+      "loss": 11.9196,
+      "step": 65
+    },
+    {
+      "epoch": 0.02734122155192502,
+      "grad_norm": 0.04601728916168213,
+      "learning_rate": 0.00024016217209245374,
+      "loss": 11.9174,
+      "step": 66
+    },
+    {
+      "epoch": 0.027755482484529942,
+      "grad_norm": 0.047061558812856674,
+      "learning_rate": 0.00023816778784387094,
+      "loss": 11.9189,
+      "step": 67
+    },
+    {
+      "epoch": 0.02816974341713487,
+      "grad_norm": 0.039436087012290955,
+      "learning_rate": 0.0002361492994024415,
+      "loss": 11.9178,
+      "step": 68
+    },
+    {
+      "epoch": 0.028584004349739792,
+      "grad_norm": 0.03027687780559063,
+      "learning_rate": 0.0002341072586027509,
+      "loss": 11.9175,
+      "step": 69
+    },
+    {
+      "epoch": 0.028998265282344716,
+      "grad_norm": 0.03565209358930588,
+      "learning_rate": 0.00023204222371836405,
+      "loss": 11.9168,
+      "step": 70
+    },
+    {
+      "epoch": 0.029412526214949643,
+      "grad_norm": 0.02530493400990963,
+      "learning_rate": 0.00022995475930919905,
+      "loss": 11.9163,
+      "step": 71
+    },
+    {
+      "epoch": 0.029826787147554566,
+      "grad_norm": 0.0354970321059227,
+      "learning_rate": 0.00022784543606718227,
+      "loss": 11.9154,
+      "step": 72
+    },
+    {
+      "epoch": 0.03024104808015949,
+      "grad_norm": 0.018435046076774597,
+      "learning_rate": 0.00022571483066022657,
+      "loss": 11.9158,
+      "step": 73
+    },
+    {
+      "epoch": 0.030655309012764417,
+      "grad_norm": 0.02600877545773983,
+      "learning_rate": 0.0002235635255745762,
+      "loss": 11.9121,
+      "step": 74
+    },
+    {
+      "epoch": 0.03106956994536934,
+      "grad_norm": 0.032639019191265106,
+      "learning_rate": 0.00022139210895556104,
+      "loss": 11.9143,
+      "step": 75
+    },
+    {
+      "epoch": 0.03106956994536934,
+      "eval_loss": 11.916912078857422,
+      "eval_runtime": 0.2447,
+      "eval_samples_per_second": 204.352,
+      "eval_steps_per_second": 28.609,
+      "step": 75
+    },
+    {
+      "epoch": 0.03148383087797427,
+      "grad_norm": 0.026764797046780586,
+      "learning_rate": 0.00021920117444680317,
+      "loss": 11.9196,
+      "step": 76
+    },
+    {
+      "epoch": 0.03189809181057919,
+      "grad_norm": 0.0315483957529068,
+      "learning_rate": 0.00021699132102792097,
+      "loss": 11.9159,
+      "step": 77
+    },
+    {
+      "epoch": 0.032312352743184114,
+      "grad_norm": 0.029291000217199326,
+      "learning_rate": 0.0002147631528507739,
+      "loss": 11.9156,
+      "step": 78
+    },
+    {
+      "epoch": 0.03272661367578904,
+      "grad_norm": 0.02186393365263939,
+      "learning_rate": 0.00021251727907429355,
+      "loss": 11.9126,
+      "step": 79
+    },
+    {
+      "epoch": 0.03314087460839396,
+      "grad_norm": 0.02193310484290123,
+      "learning_rate": 0.0002102543136979454,
+      "loss": 11.9144,
+      "step": 80
+    },
+    {
+      "epoch": 0.033555135540998884,
+      "grad_norm": 0.027057521045207977,
+      "learning_rate": 0.0002079748753938678,
+      "loss": 11.9136,
+      "step": 81
+    },
+    {
+      "epoch": 0.03396939647360381,
+      "grad_norm": 0.028718745335936546,
+      "learning_rate": 0.0002056795873377331,
+      "loss": 11.9155,
+      "step": 82
+    },
+    {
+      "epoch": 0.03438365740620874,
+      "grad_norm": 0.03541611507534981,
+      "learning_rate": 0.00020336907703837748,
+      "loss": 11.9161,
+      "step": 83
+    },
+    {
+      "epoch": 0.03479791833881366,
+      "grad_norm": 0.027952907606959343,
+      "learning_rate": 0.00020104397616624645,
+      "loss": 11.9165,
+      "step": 84
+    },
+    {
+      "epoch": 0.035212179271418585,
+      "grad_norm": 0.02944616600871086,
+      "learning_rate": 0.00019870492038070252,
+      "loss": 11.9162,
+      "step": 85
+    },
+    {
+      "epoch": 0.03562644020402351,
+      "grad_norm": 0.028091363608837128,
+      "learning_rate": 0.0001963525491562421,
+      "loss": 11.9145,
+      "step": 86
+    },
+    {
+      "epoch": 0.03604070113662843,
+      "grad_norm": 0.01949743553996086,
+      "learning_rate": 0.0001939875056076697,
+      "loss": 11.9126,
+      "step": 87
+    },
+    {
+      "epoch": 0.036454962069233356,
+      "grad_norm": 0.03287213295698166,
+      "learning_rate": 0.00019161043631427666,
+      "loss": 11.9162,
+      "step": 88
+    },
+    {
+      "epoch": 0.036869223001838286,
+      "grad_norm": 0.024355346336960793,
+      "learning_rate": 0.00018922199114307294,
+      "loss": 11.9142,
+      "step": 89
+    },
+    {
+      "epoch": 0.03728348393444321,
+      "grad_norm": 0.02432161383330822,
+      "learning_rate": 0.00018682282307111987,
+      "loss": 11.9123,
+      "step": 90
+    },
+    {
+      "epoch": 0.03769774486704813,
+      "grad_norm": 0.02186986431479454,
+      "learning_rate": 0.00018441358800701273,
+      "loss": 11.9155,
+      "step": 91
+    },
+    {
+      "epoch": 0.038112005799653056,
+      "grad_norm": 0.02595347724854946,
+      "learning_rate": 0.00018199494461156203,
+      "loss": 11.9145,
+      "step": 92
+    },
+    {
+      "epoch": 0.03852626673225798,
+      "grad_norm": 0.01945437304675579,
+      "learning_rate": 0.000179567554117722,
+      "loss": 11.9142,
+      "step": 93
+    },
+    {
+      "epoch": 0.0389405276648629,
+      "grad_norm": 0.034285496920347214,
+      "learning_rate": 0.00017713208014981648,
+      "loss": 11.9142,
+      "step": 94
+    },
+    {
+      "epoch": 0.03935478859746783,
+      "grad_norm": 0.019000215455889702,
+      "learning_rate": 0.00017468918854211007,
+      "loss": 11.9144,
+      "step": 95
+    },
+    {
+      "epoch": 0.03976904953007276,
+      "grad_norm": 0.027748456224799156,
+      "learning_rate": 0.00017223954715677627,
+      "loss": 11.9086,
+      "step": 96
+    },
+    {
+      "epoch": 0.04018331046267768,
+      "grad_norm": 0.02137632481753826,
+      "learning_rate": 0.00016978382570131034,
+      "loss": 11.9144,
+      "step": 97
+    },
+    {
+      "epoch": 0.040597571395282604,
+      "grad_norm": 0.02740846946835518,
+      "learning_rate": 0.00016732269554543794,
+      "loss": 11.9143,
+      "step": 98
+    },
+    {
+      "epoch": 0.04101183232788753,
+      "grad_norm": 0.02057352475821972,
+      "learning_rate": 0.00016485682953756942,
+      "loss": 11.912,
+      "step": 99
+    },
+    {
+      "epoch": 0.04142609326049245,
+      "grad_norm": 0.036128297448158264,
+      "learning_rate": 0.00016238690182084986,
+      "loss": 11.9097,
+      "step": 100
+    },
+    {
+      "epoch": 0.04142609326049245,
+      "eval_loss": 11.915943145751953,
+      "eval_runtime": 0.2452,
+      "eval_samples_per_second": 203.942,
+      "eval_steps_per_second": 28.552,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 159262310400.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null