Training in progress, step 25, checkpoint

Browse files

Files changed (10) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +59 -608
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "o_proj",
     "gate_proj",
-    "k_proj",
     "v_proj",
-    "up_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
     "gate_proj",
+    "o_proj",
     "v_proj",
+    "k_proj",
+    "q_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e07b5035ffb180931a60c0f114ef7216a354c482f95cd25ca93960a8ebf1c535
 size 323014168

 version https://git-lfs.github.com/spec/v1
+oid sha256:a71533fe03d6d5298967c52fe8a3adcd9f58f9bfde5ea1ca92fded50af052345
 size 323014168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3b5f5bb59cebe737dc3be6e482be6849e6e1825609a4a24df7ab1326e900710
 size 646253418

 version https://git-lfs.github.com/spec/v1
+oid sha256:97709bd8d8c4f3d5fbc7ea6c9b936a9128092e61fc480fb684fc8b8aa374bc4a
 size 646253418

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af25ef66e3578b088787098691ff18f3d160f468c01678789a770159b408e9ff
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b6d31cc46b3fdac785b4e257da9a155cd9ffc56f4117a3380bad48dd22d6e21
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3cb1a771d842cefd502b9e7b043d798857a051d7ea04a1aca476720f6bce1e4
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:d95845af24bb237001e703b7e7c7500ff206982797e8188e4d4583e54688da29
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c5fc06ca6e201f6cf92a9a131de3c7777a41831442b3ee77f257c6e125e5136
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a9523f33dac34eaa51608e01e3a143afa57e5b297a1d3e88c82c8eac8e33a19
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce1df0fc9a8b7b175429da9e9454ce253fb484f5186e19d58b68bb4f04eaed68
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:37851911ccb33a0de34736e35c192d969c66ca74673f9cfdb04e6055cfc12252
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:051dee7dfbeecb34b46e8409ffafec324501f465585234624669bc8c9e863ae4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:107607e336ff02e6e3fc135f6b55fc089901c7172808564e7046a8e836a537c9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,16 +1,16 @@
 {
-  "best_metric": 0.7247900366783142,
-  "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.03545392098207361,
   "eval_steps": 25,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0003545392098207361,
-      "grad_norm": 4.592109680175781,
       "learning_rate": 2e-05,
       "loss": 5.0876,
       "step": 1
@@ -18,735 +18,186 @@
     {
       "epoch": 0.0003545392098207361,
       "eval_loss": 6.216423511505127,
-      "eval_runtime": 1.4239,
-      "eval_samples_per_second": 35.114,
-      "eval_steps_per_second": 9.13,
       "step": 1
     },
     {
       "epoch": 0.0007090784196414722,
-      "grad_norm": 4.908637523651123,
       "learning_rate": 4e-05,
       "loss": 4.9835,
       "step": 2
     },
     {
       "epoch": 0.0010636176294622083,
-      "grad_norm": 5.108648777008057,
       "learning_rate": 6e-05,
-      "loss": 5.0016,
       "step": 3
     },
     {
       "epoch": 0.0014181568392829444,
-      "grad_norm": 5.53975248336792,
       "learning_rate": 8e-05,
-      "loss": 4.8283,
       "step": 4
     },
     {
       "epoch": 0.0017726960491036806,
-      "grad_norm": 5.778749465942383,
       "learning_rate": 0.0001,
-      "loss": 3.61,
       "step": 5
     },
     {
       "epoch": 0.0021272352589244165,
-      "grad_norm": 5.146139144897461,
       "learning_rate": 9.997539658034168e-05,
-      "loss": 2.5626,
       "step": 6
     },
     {
       "epoch": 0.002481774468745153,
-      "grad_norm": 3.335517644882202,
       "learning_rate": 9.990161322484486e-05,
-      "loss": 1.6114,
       "step": 7
     },
     {
       "epoch": 0.002836313678565889,
-      "grad_norm": 1.6765196323394775,
       "learning_rate": 9.977873061452552e-05,
-      "loss": 1.2627,
       "step": 8
     },
     {
       "epoch": 0.003190852888386625,
-      "grad_norm": 1.5829001665115356,
       "learning_rate": 9.96068831197139e-05,
-      "loss": 1.0987,
       "step": 9
     },
     {
       "epoch": 0.003545392098207361,
-      "grad_norm": 2.0118753910064697,
       "learning_rate": 9.938625865312251e-05,
-      "loss": 0.8517,
       "step": 10
     },
     {
       "epoch": 0.003899931308028097,
-      "grad_norm": 2.000054121017456,
       "learning_rate": 9.911709846436641e-05,
-      "loss": 0.7161,
       "step": 11
     },
     {
       "epoch": 0.004254470517848833,
-      "grad_norm": 1.3442065715789795,
       "learning_rate": 9.879969687616027e-05,
-      "loss": 0.7239,
       "step": 12
     },
     {
       "epoch": 0.004609009727669569,
-      "grad_norm": 1.3330413103103638,
       "learning_rate": 9.84344009624807e-05,
-      "loss": 0.8929,
       "step": 13
     },
     {
       "epoch": 0.004963548937490306,
-      "grad_norm": 1.584913969039917,
       "learning_rate": 9.80216101690461e-05,
-      "loss": 1.193,
       "step": 14
     },
     {
       "epoch": 0.005318088147311041,
-      "grad_norm": 0.9643686413764954,
       "learning_rate": 9.756177587652856e-05,
-      "loss": 1.0059,
       "step": 15
     },
     {
       "epoch": 0.005672627357131778,
-      "grad_norm": 1.0433870553970337,
       "learning_rate": 9.705540090697575e-05,
       "loss": 1.1119,
       "step": 16
     },
     {
       "epoch": 0.006027166566952514,
-      "grad_norm": 1.0804919004440308,
       "learning_rate": 9.650303897398232e-05,
-      "loss": 0.8664,
       "step": 17
     },
     {
       "epoch": 0.00638170577677325,
-      "grad_norm": 0.9156429767608643,
       "learning_rate": 9.590529407721231e-05,
-      "loss": 0.8146,
       "step": 18
     },
     {
       "epoch": 0.006736244986593986,
-      "grad_norm": 0.7510998845100403,
       "learning_rate": 9.526281984193436e-05,
-      "loss": 0.7862,
       "step": 19
     },
     {
       "epoch": 0.007090784196414722,
-      "grad_norm": 0.8360267877578735,
       "learning_rate": 9.4576318804292e-05,
-      "loss": 0.7927,
       "step": 20
     },
     {
       "epoch": 0.007445323406235459,
-      "grad_norm": 0.7520768046379089,
       "learning_rate": 9.384654164309083e-05,
       "loss": 0.6681,
       "step": 21
     },
     {
       "epoch": 0.007799862616056194,
-      "grad_norm": 0.5856723189353943,
       "learning_rate": 9.30742863589421e-05,
-      "loss": 0.6741,
       "step": 22
     },
     {
       "epoch": 0.008154401825876931,
-      "grad_norm": 0.716020405292511,
       "learning_rate": 9.226039740166091e-05,
-      "loss": 0.6866,
       "step": 23
     },
     {
       "epoch": 0.008508941035697666,
-      "grad_norm": 0.6712137460708618,
       "learning_rate": 9.140576474687264e-05,
-      "loss": 0.4932,
       "step": 24
     },
     {
       "epoch": 0.008863480245518402,
-      "grad_norm": 0.7443585395812988,
       "learning_rate": 9.051132292283771e-05,
-      "loss": 0.4627,
       "step": 25
     },
     {
       "epoch": 0.008863480245518402,
-      "eval_loss": 0.7844333052635193,
-      "eval_runtime": 1.4306,
-      "eval_samples_per_second": 34.95,
-      "eval_steps_per_second": 9.087,
       "step": 25
-    },
-    {
-      "epoch": 0.009218019455339139,
-      "grad_norm": 0.6638553142547607,
-      "learning_rate": 8.957804998855866e-05,
-      "loss": 1.0456,
-      "step": 26
-    },
-    {
-      "epoch": 0.009572558665159875,
-      "grad_norm": 0.8564605116844177,
-      "learning_rate": 8.860696646428693e-05,
-      "loss": 1.0947,
-      "step": 27
-    },
-    {
-      "epoch": 0.009927097874980612,
-      "grad_norm": 0.7077105641365051,
-      "learning_rate": 8.759913421559902e-05,
-      "loss": 1.0131,
-      "step": 28
-    },
-    {
-      "epoch": 0.010281637084801348,
-      "grad_norm": 0.8055078387260437,
-      "learning_rate": 8.655565529226198e-05,
-      "loss": 0.949,
-      "step": 29
-    },
-    {
-      "epoch": 0.010636176294622083,
-      "grad_norm": 0.846217155456543,
-      "learning_rate": 8.547767072315835e-05,
-      "loss": 0.7614,
-      "step": 30
-    },
-    {
-      "epoch": 0.010990715504442819,
-      "grad_norm": 0.7835873365402222,
-      "learning_rate": 8.436635926858759e-05,
-      "loss": 0.7072,
-      "step": 31
-    },
-    {
-      "epoch": 0.011345254714263555,
-      "grad_norm": 0.6457473039627075,
-      "learning_rate": 8.322293613130917e-05,
-      "loss": 0.6011,
-      "step": 32
-    },
-    {
-      "epoch": 0.011699793924084292,
-      "grad_norm": 0.6326347589492798,
-      "learning_rate": 8.204865162773613e-05,
-      "loss": 0.7046,
-      "step": 33
-    },
-    {
-      "epoch": 0.012054333133905028,
-      "grad_norm": 0.6413611173629761,
-      "learning_rate": 8.084478982073247e-05,
-      "loss": 0.5783,
-      "step": 34
-    },
-    {
-      "epoch": 0.012408872343725764,
-      "grad_norm": 0.7243404388427734,
-      "learning_rate": 7.961266711550922e-05,
-      "loss": 0.4478,
-      "step": 35
-    },
-    {
-      "epoch": 0.0127634115535465,
-      "grad_norm": 0.565170168876648,
-      "learning_rate": 7.835363082015468e-05,
-      "loss": 0.4311,
-      "step": 36
-    },
-    {
-      "epoch": 0.013117950763367235,
-      "grad_norm": 0.6162042021751404,
-      "learning_rate": 7.706905767237288e-05,
-      "loss": 0.3916,
-      "step": 37
-    },
-    {
-      "epoch": 0.013472489973187972,
-      "grad_norm": 0.7090334892272949,
-      "learning_rate": 7.576035233404096e-05,
-      "loss": 0.7694,
-      "step": 38
-    },
-    {
-      "epoch": 0.013827029183008708,
-      "grad_norm": 0.7033197283744812,
-      "learning_rate": 7.442894585523218e-05,
-      "loss": 0.9693,
-      "step": 39
-    },
-    {
-      "epoch": 0.014181568392829445,
-      "grad_norm": 0.7213008999824524,
-      "learning_rate": 7.307629410938363e-05,
-      "loss": 0.8617,
-      "step": 40
-    },
-    {
-      "epoch": 0.014536107602650181,
-      "grad_norm": 0.6332206726074219,
-      "learning_rate": 7.170387620131993e-05,
-      "loss": 0.7386,
-      "step": 41
-    },
-    {
-      "epoch": 0.014890646812470917,
-      "grad_norm": 0.8468403220176697,
-      "learning_rate": 7.031319284987394e-05,
-      "loss": 0.7881,
-      "step": 42
-    },
-    {
-      "epoch": 0.015245186022291652,
-      "grad_norm": 0.7750101089477539,
-      "learning_rate": 6.890576474687263e-05,
-      "loss": 0.6851,
-      "step": 43
-    },
-    {
-      "epoch": 0.015599725232112388,
-      "grad_norm": 0.7253099679946899,
-      "learning_rate": 6.7483130894283e-05,
-      "loss": 0.6545,
-      "step": 44
-    },
-    {
-      "epoch": 0.015954264441933125,
-      "grad_norm": 0.717294454574585,
-      "learning_rate": 6.604684692133597e-05,
-      "loss": 0.6447,
-      "step": 45
-    },
-    {
-      "epoch": 0.016308803651753863,
-      "grad_norm": 0.6965174078941345,
-      "learning_rate": 6.459848338346861e-05,
-      "loss": 0.5076,
-      "step": 46
-    },
-    {
-      "epoch": 0.016663342861574598,
-      "grad_norm": 0.7776918411254883,
-      "learning_rate": 6.313962404494496e-05,
-      "loss": 0.5284,
-      "step": 47
-    },
-    {
-      "epoch": 0.017017882071395332,
-      "grad_norm": 0.9502537846565247,
-      "learning_rate": 6.167186414703289e-05,
-      "loss": 0.5736,
-      "step": 48
-    },
-    {
-      "epoch": 0.01737242128121607,
-      "grad_norm": 0.8803554177284241,
-      "learning_rate": 6.019680866363139e-05,
-      "loss": 0.576,
-      "step": 49
-    },
-    {
-      "epoch": 0.017726960491036805,
-      "grad_norm": 0.6308343410491943,
-      "learning_rate": 5.8716070546254966e-05,
-      "loss": 0.3936,
-      "step": 50
-    },
-    {
-      "epoch": 0.017726960491036805,
-      "eval_loss": 0.7665626406669617,
-      "eval_runtime": 1.431,
-      "eval_samples_per_second": 34.942,
-      "eval_steps_per_second": 9.085,
-      "step": 50
-    },
-    {
-      "epoch": 0.018081499700857543,
-      "grad_norm": 0.7196190357208252,
-      "learning_rate": 5.7231268960295e-05,
-      "loss": 0.96,
-      "step": 51
-    },
-    {
-      "epoch": 0.018436038910678278,
-      "grad_norm": 0.7429586052894592,
-      "learning_rate": 5.574402751448614e-05,
-      "loss": 0.9205,
-      "step": 52
-    },
-    {
-      "epoch": 0.018790578120499012,
-      "grad_norm": 0.7085890173912048,
-      "learning_rate": 5.425597248551387e-05,
-      "loss": 0.7904,
-      "step": 53
-    },
-    {
-      "epoch": 0.01914511733031975,
-      "grad_norm": 0.775666356086731,
-      "learning_rate": 5.2768731039705e-05,
-      "loss": 0.8087,
-      "step": 54
-    },
-    {
-      "epoch": 0.019499656540140485,
-      "grad_norm": 0.8202411532402039,
-      "learning_rate": 5.128392945374505e-05,
-      "loss": 0.7787,
-      "step": 55
-    },
-    {
-      "epoch": 0.019854195749961223,
-      "grad_norm": 0.7467792630195618,
-      "learning_rate": 4.980319133636863e-05,
-      "loss": 0.672,
-      "step": 56
-    },
-    {
-      "epoch": 0.020208734959781958,
-      "grad_norm": 0.756299614906311,
-      "learning_rate": 4.83281358529671e-05,
-      "loss": 0.6991,
-      "step": 57
-    },
-    {
-      "epoch": 0.020563274169602696,
-      "grad_norm": 0.7745821475982666,
-      "learning_rate": 4.686037595505507e-05,
-      "loss": 0.6683,
-      "step": 58
-    },
-    {
-      "epoch": 0.02091781337942343,
-      "grad_norm": 0.6686888933181763,
-      "learning_rate": 4.54015166165314e-05,
-      "loss": 0.5444,
-      "step": 59
-    },
-    {
-      "epoch": 0.021272352589244165,
-      "grad_norm": 0.6725859045982361,
-      "learning_rate": 4.395315307866405e-05,
-      "loss": 0.468,
-      "step": 60
-    },
-    {
-      "epoch": 0.021626891799064903,
-      "grad_norm": 0.6770166754722595,
-      "learning_rate": 4.2516869105717004e-05,
-      "loss": 0.4267,
-      "step": 61
-    },
-    {
-      "epoch": 0.021981431008885638,
-      "grad_norm": 0.8719369769096375,
-      "learning_rate": 4.109423525312738e-05,
-      "loss": 0.4629,
-      "step": 62
-    },
-    {
-      "epoch": 0.022335970218706376,
-      "grad_norm": 0.6566356420516968,
-      "learning_rate": 3.968680715012606e-05,
-      "loss": 0.5978,
-      "step": 63
-    },
-    {
-      "epoch": 0.02269050942852711,
-      "grad_norm": 0.7933331727981567,
-      "learning_rate": 3.829612379868006e-05,
-      "loss": 1.0035,
-      "step": 64
-    },
-    {
-      "epoch": 0.02304504863834785,
-      "grad_norm": 0.8843840956687927,
-      "learning_rate": 3.692370589061639e-05,
-      "loss": 0.9023,
-      "step": 65
-    },
-    {
-      "epoch": 0.023399587848168583,
-      "grad_norm": 1.0728343725204468,
-      "learning_rate": 3.557105414476782e-05,
-      "loss": 0.9255,
-      "step": 66
-    },
-    {
-      "epoch": 0.023754127057989318,
-      "grad_norm": 0.7340516448020935,
-      "learning_rate": 3.423964766595906e-05,
-      "loss": 0.7142,
-      "step": 67
-    },
-    {
-      "epoch": 0.024108666267810056,
-      "grad_norm": 0.8005227446556091,
-      "learning_rate": 3.293094232762715e-05,
-      "loss": 0.6483,
-      "step": 68
-    },
-    {
-      "epoch": 0.02446320547763079,
-      "grad_norm": 0.7076966762542725,
-      "learning_rate": 3.164636917984534e-05,
-      "loss": 0.6988,
-      "step": 69
-    },
-    {
-      "epoch": 0.02481774468745153,
-      "grad_norm": 0.6570345163345337,
-      "learning_rate": 3.0387332884490805e-05,
-      "loss": 0.636,
-      "step": 70
-    },
-    {
-      "epoch": 0.025172283897272264,
-      "grad_norm": 0.6902582049369812,
-      "learning_rate": 2.9155210179267546e-05,
-      "loss": 0.4957,
-      "step": 71
-    },
-    {
-      "epoch": 0.025526823107093,
-      "grad_norm": 0.7639642953872681,
-      "learning_rate": 2.7951348372263875e-05,
-      "loss": 0.5997,
-      "step": 72
-    },
-    {
-      "epoch": 0.025881362316913736,
-      "grad_norm": 0.7231664061546326,
-      "learning_rate": 2.677706386869083e-05,
-      "loss": 0.4678,
-      "step": 73
-    },
-    {
-      "epoch": 0.02623590152673447,
-      "grad_norm": 0.7691690325737,
-      "learning_rate": 2.5633640731412412e-05,
-      "loss": 0.5281,
-      "step": 74
-    },
-    {
-      "epoch": 0.02659044073655521,
-      "grad_norm": 0.6874270439147949,
-      "learning_rate": 2.4522329276841663e-05,
-      "loss": 0.307,
-      "step": 75
-    },
-    {
-      "epoch": 0.02659044073655521,
-      "eval_loss": 0.7375490665435791,
-      "eval_runtime": 1.43,
-      "eval_samples_per_second": 34.966,
-      "eval_steps_per_second": 9.091,
-      "step": 75
-    },
-    {
-      "epoch": 0.026944979946375944,
-      "grad_norm": 0.689096212387085,
-      "learning_rate": 2.3444344707738015e-05,
-      "loss": 0.9376,
-      "step": 76
-    },
-    {
-      "epoch": 0.027299519156196682,
-      "grad_norm": 0.7334891557693481,
-      "learning_rate": 2.2400865784401e-05,
-      "loss": 0.8911,
-      "step": 77
-    },
-    {
-      "epoch": 0.027654058366017416,
-      "grad_norm": 0.6938356757164001,
-      "learning_rate": 2.1393033535713093e-05,
-      "loss": 0.7595,
-      "step": 78
-    },
-    {
-      "epoch": 0.02800859757583815,
-      "grad_norm": 0.6203986406326294,
-      "learning_rate": 2.0421950011441354e-05,
-      "loss": 0.6434,
-      "step": 79
-    },
-    {
-      "epoch": 0.02836313678565889,
-      "grad_norm": 1.0269389152526855,
-      "learning_rate": 1.9488677077162295e-05,
-      "loss": 0.5729,
-      "step": 80
-    },
-    {
-      "epoch": 0.028717675995479624,
-      "grad_norm": 0.7017802596092224,
-      "learning_rate": 1.8594235253127375e-05,
-      "loss": 0.6952,
-      "step": 81
-    },
-    {
-      "epoch": 0.029072215205300362,
-      "grad_norm": 0.823606550693512,
-      "learning_rate": 1.77396025983391e-05,
-      "loss": 0.7631,
-      "step": 82
-    },
-    {
-      "epoch": 0.029426754415121097,
-      "grad_norm": 0.6583719253540039,
-      "learning_rate": 1.6925713641057904e-05,
-      "loss": 0.5065,
-      "step": 83
-    },
-    {
-      "epoch": 0.029781293624941835,
-      "grad_norm": 0.7148926854133606,
-      "learning_rate": 1.6153458356909176e-05,
-      "loss": 0.6016,
-      "step": 84
-    },
-    {
-      "epoch": 0.03013583283476257,
-      "grad_norm": 0.7244095206260681,
-      "learning_rate": 1.5423681195707997e-05,
-      "loss": 0.511,
-      "step": 85
-    },
-    {
-      "epoch": 0.030490372044583304,
-      "grad_norm": 0.7808502912521362,
-      "learning_rate": 1.4737180158065644e-05,
-      "loss": 0.4341,
-      "step": 86
-    },
-    {
-      "epoch": 0.030844911254404042,
-      "grad_norm": 0.6845284104347229,
-      "learning_rate": 1.4094705922787687e-05,
-      "loss": 0.438,
-      "step": 87
-    },
-    {
-      "epoch": 0.031199450464224777,
-      "grad_norm": 0.7536430358886719,
-      "learning_rate": 1.3496961026017687e-05,
-      "loss": 0.7226,
-      "step": 88
-    },
-    {
-      "epoch": 0.03155398967404551,
-      "grad_norm": 0.7071179747581482,
-      "learning_rate": 1.2944599093024267e-05,
-      "loss": 0.8611,
-      "step": 89
-    },
-    {
-      "epoch": 0.03190852888386625,
-      "grad_norm": 0.7080587148666382,
-      "learning_rate": 1.2438224123471442e-05,
-      "loss": 0.7928,
-      "step": 90
-    },
-    {
-      "epoch": 0.03226306809368699,
-      "grad_norm": 0.8562609553337097,
-      "learning_rate": 1.1978389830953907e-05,
-      "loss": 0.8498,
-      "step": 91
-    },
-    {
-      "epoch": 0.032617607303507726,
-      "grad_norm": 0.7281271815299988,
-      "learning_rate": 1.1565599037519316e-05,
-      "loss": 0.7436,
-      "step": 92
-    },
-    {
-      "epoch": 0.03297214651332846,
-      "grad_norm": 0.7492510676383972,
-      "learning_rate": 1.1200303123839742e-05,
-      "loss": 0.6069,
-      "step": 93
-    },
-    {
-      "epoch": 0.033326685723149195,
-      "grad_norm": 0.7654992938041687,
-      "learning_rate": 1.088290153563358e-05,
-      "loss": 0.6479,
-      "step": 94
-    },
-    {
-      "epoch": 0.03368122493296993,
-      "grad_norm": 0.8193288445472717,
-      "learning_rate": 1.0613741346877497e-05,
-      "loss": 0.6431,
-      "step": 95
-    },
-    {
-      "epoch": 0.034035764142790664,
-      "grad_norm": 0.9601457715034485,
-      "learning_rate": 1.0393116880286118e-05,
-      "loss": 0.6555,
-      "step": 96
-    },
-    {
-      "epoch": 0.0343903033526114,
-      "grad_norm": 0.820478081703186,
-      "learning_rate": 1.0221269385474488e-05,
-      "loss": 0.5409,
-      "step": 97
-    },
-    {
-      "epoch": 0.03474484256243214,
-      "grad_norm": 0.8726357817649841,
-      "learning_rate": 1.0098386775155147e-05,
-      "loss": 0.4809,
-      "step": 98
-    },
-    {
-      "epoch": 0.03509938177225287,
-      "grad_norm": 0.7600851058959961,
-      "learning_rate": 1.0024603419658329e-05,
-      "loss": 0.4056,
-      "step": 99
-    },
-    {
-      "epoch": 0.03545392098207361,
-      "grad_norm": 0.6082343459129333,
-      "learning_rate": 1e-05,
-      "loss": 0.2769,
-      "step": 100
-    },
-    {
-      "epoch": 0.03545392098207361,
-      "eval_loss": 0.7247900366783142,
-      "eval_runtime": 1.4297,
-      "eval_samples_per_second": 34.973,
-      "eval_steps_per_second": 9.093,
-      "step": 100
     }
   ],
   "logging_steps": 1,
@@ -770,12 +221,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.1249873143390536e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7849562168121338,
+  "best_model_checkpoint": "miner_id_24/checkpoint-25",
+  "epoch": 0.008863480245518402,
   "eval_steps": 25,
+  "global_step": 25,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.0003545392098207361,
+      "grad_norm": 4.848531246185303,
       "learning_rate": 2e-05,
       "loss": 5.0876,
       "step": 1
     {
       "epoch": 0.0003545392098207361,
       "eval_loss": 6.216423511505127,
+      "eval_runtime": 1.4346,
+      "eval_samples_per_second": 34.853,
+      "eval_steps_per_second": 9.062,
       "step": 1
     },
     {
       "epoch": 0.0007090784196414722,
+      "grad_norm": 5.189554691314697,
       "learning_rate": 4e-05,
       "loss": 4.9835,
       "step": 2
     },
     {
       "epoch": 0.0010636176294622083,
+      "grad_norm": 5.395487308502197,
       "learning_rate": 6e-05,
+      "loss": 5.0002,
       "step": 3
     },
     {
       "epoch": 0.0014181568392829444,
+      "grad_norm": 5.79182767868042,
       "learning_rate": 8e-05,
+      "loss": 4.815,
       "step": 4
     },
     {
       "epoch": 0.0017726960491036806,
+      "grad_norm": 5.924882411956787,
       "learning_rate": 0.0001,
+      "loss": 3.5888,
       "step": 5
     },
     {
       "epoch": 0.0021272352589244165,
+      "grad_norm": 5.22299861907959,
       "learning_rate": 9.997539658034168e-05,
+      "loss": 2.5276,
       "step": 6
     },
     {
       "epoch": 0.002481774468745153,
+      "grad_norm": 3.220250129699707,
       "learning_rate": 9.990161322484486e-05,
+      "loss": 1.5851,
       "step": 7
     },
     {
       "epoch": 0.002836313678565889,
+      "grad_norm": 1.6759289503097534,
       "learning_rate": 9.977873061452552e-05,
+      "loss": 1.2539,
       "step": 8
     },
     {
       "epoch": 0.003190852888386625,
+      "grad_norm": 1.6135218143463135,
       "learning_rate": 9.96068831197139e-05,
+      "loss": 1.0919,
       "step": 9
     },
     {
       "epoch": 0.003545392098207361,
+      "grad_norm": 1.962496042251587,
       "learning_rate": 9.938625865312251e-05,
+      "loss": 0.8399,
       "step": 10
     },
     {
       "epoch": 0.003899931308028097,
+      "grad_norm": 2.1563808917999268,
       "learning_rate": 9.911709846436641e-05,
+      "loss": 0.7126,
       "step": 11
     },
     {
       "epoch": 0.004254470517848833,
+      "grad_norm": 1.3578228950500488,
       "learning_rate": 9.879969687616027e-05,
+      "loss": 0.7229,
       "step": 12
     },
     {
       "epoch": 0.004609009727669569,
+      "grad_norm": 1.3786768913269043,
       "learning_rate": 9.84344009624807e-05,
+      "loss": 0.8936,
       "step": 13
     },
     {
       "epoch": 0.004963548937490306,
+      "grad_norm": 1.6054021120071411,
       "learning_rate": 9.80216101690461e-05,
+      "loss": 1.1911,
       "step": 14
     },
     {
       "epoch": 0.005318088147311041,
+      "grad_norm": 0.9699863791465759,
       "learning_rate": 9.756177587652856e-05,
+      "loss": 1.0015,
       "step": 15
     },
     {
       "epoch": 0.005672627357131778,
+      "grad_norm": 1.0598102807998657,
       "learning_rate": 9.705540090697575e-05,
       "loss": 1.1119,
       "step": 16
     },
     {
       "epoch": 0.006027166566952514,
+      "grad_norm": 1.0990904569625854,
       "learning_rate": 9.650303897398232e-05,
+      "loss": 0.8657,
       "step": 17
     },
     {
       "epoch": 0.00638170577677325,
+      "grad_norm": 0.9300882816314697,
       "learning_rate": 9.590529407721231e-05,
+      "loss": 0.8144,
       "step": 18
     },
     {
       "epoch": 0.006736244986593986,
+      "grad_norm": 0.7585983276367188,
       "learning_rate": 9.526281984193436e-05,
+      "loss": 0.7869,
       "step": 19
     },
     {
       "epoch": 0.007090784196414722,
+      "grad_norm": 0.83787602186203,
       "learning_rate": 9.4576318804292e-05,
+      "loss": 0.7916,
       "step": 20
     },
     {
       "epoch": 0.007445323406235459,
+      "grad_norm": 0.7473427653312683,
       "learning_rate": 9.384654164309083e-05,
       "loss": 0.6681,
       "step": 21
     },
     {
       "epoch": 0.007799862616056194,
+      "grad_norm": 0.5894711017608643,
       "learning_rate": 9.30742863589421e-05,
+      "loss": 0.675,
       "step": 22
     },
     {
       "epoch": 0.008154401825876931,
+      "grad_norm": 0.715570867061615,
       "learning_rate": 9.226039740166091e-05,
+      "loss": 0.6853,
       "step": 23
     },
     {
       "epoch": 0.008508941035697666,
+      "grad_norm": 0.6784505844116211,
       "learning_rate": 9.140576474687264e-05,
+      "loss": 0.4911,
       "step": 24
     },
     {
       "epoch": 0.008863480245518402,
+      "grad_norm": 0.7639828324317932,
       "learning_rate": 9.051132292283771e-05,
+      "loss": 0.4635,
       "step": 25
     },
     {
       "epoch": 0.008863480245518402,
+      "eval_loss": 0.7849562168121338,
+      "eval_runtime": 1.4348,
+      "eval_samples_per_second": 34.849,
+      "eval_steps_per_second": 9.061,
       "step": 25
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.812907666370724e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e13ebf621f3ae7801a8fa1b74c6bbd62a569669ff2bc7e880f5897ba3c972d81
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:05bf92cdc9776646a10f781dcc857c6462d8f9a3727f2361008d8e455efc9baa
 size 6840