Training in progress, step 2899, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +221 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a96170c4566c1e7efa993538ecb2474b7056c80c01787e9d312ce31d75111369
 size 2145944

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6ec0ef0bd58107442e53f9af82589272dd28dae4b201ef6aa9c49f0c83bfc1c
 size 2145944

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:635f9699d60977f7f49fef0fac4d8dd2e9540f390cee9cca0ae8d57bdec47c46
 size 4310020

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9965916e24cdc2427889fb3540f08506a8da344166dce118b8ba199ea51941c
 size 4310020

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9966c454e255711c675f388ff89a77f36c50534180a271dc2d40e7f43b870bf
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4242e9d0ac065fef7739bb68fca0ee428f925673e7726dd722595d4147173376
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:237b866b1fce3ebf6f30679e5de802610141e1fcea25e04865362188175cac93
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a35fbe8edc635ebc2790ec5a9bb024f4b9252d94d57cb9bc113900a36047bd39
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9893066574680924,
   "eval_steps": 500,
-  "global_step": 2868,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -20083,6 +20083,223 @@
       "learning_rate": 3.0263108131095566e-08,
       "loss": 9.5316,
       "step": 2868
     }
   ],
   "logging_steps": 1,
@@ -20097,12 +20314,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 595349320237056.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 2899,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.0263108131095566e-08,
       "loss": 9.5316,
       "step": 2868
+    },
+    {
+      "epoch": 0.9896516040013797,
+      "grad_norm": 1.1785590648651123,
+      "learning_rate": 2.8342322313085202e-08,
+      "loss": 9.4722,
+      "step": 2869
+    },
+    {
+      "epoch": 0.9899965505346672,
+      "grad_norm": 1.2561894655227661,
+      "learning_rate": 2.6484489673084257e-08,
+      "loss": 9.5575,
+      "step": 2870
+    },
+    {
+      "epoch": 0.9903414970679545,
+      "grad_norm": 1.1631916761398315,
+      "learning_rate": 2.4689612551553888e-08,
+      "loss": 9.5365,
+      "step": 2871
+    },
+    {
+      "epoch": 0.9906864436012418,
+      "grad_norm": 1.2572925090789795,
+      "learning_rate": 2.2957693209635368e-08,
+      "loss": 9.5596,
+      "step": 2872
+    },
+    {
+      "epoch": 0.9910313901345291,
+      "grad_norm": 1.324566125869751,
+      "learning_rate": 2.1288733829161188e-08,
+      "loss": 9.4521,
+      "step": 2873
+    },
+    {
+      "epoch": 0.9913763366678164,
+      "grad_norm": 1.3486089706420898,
+      "learning_rate": 1.9682736512660617e-08,
+      "loss": 9.4974,
+      "step": 2874
+    },
+    {
+      "epoch": 0.9917212832011039,
+      "grad_norm": 1.174851417541504,
+      "learning_rate": 1.813970328331527e-08,
+      "loss": 9.5033,
+      "step": 2875
+    },
+    {
+      "epoch": 0.9920662297343912,
+      "grad_norm": 1.2707513570785522,
+      "learning_rate": 1.6659636085020192e-08,
+      "loss": 9.4723,
+      "step": 2876
+    },
+    {
+      "epoch": 0.9924111762676785,
+      "grad_norm": 1.474165678024292,
+      "learning_rate": 1.5242536782317242e-08,
+      "loss": 9.4423,
+      "step": 2877
+    },
+    {
+      "epoch": 0.9927561228009658,
+      "grad_norm": 1.3797553777694702,
+      "learning_rate": 1.388840716045059e-08,
+      "loss": 9.4966,
+      "step": 2878
+    },
+    {
+      "epoch": 0.9931010693342532,
+      "grad_norm": 1.326752781867981,
+      "learning_rate": 1.2597248925311222e-08,
+      "loss": 9.5313,
+      "step": 2879
+    },
+    {
+      "epoch": 0.9934460158675406,
+      "grad_norm": 1.3259997367858887,
+      "learning_rate": 1.1369063703475791e-08,
+      "loss": 9.4992,
+      "step": 2880
+    },
+    {
+      "epoch": 0.9937909624008279,
+      "grad_norm": 1.3481284379959106,
+      "learning_rate": 1.0203853042184407e-08,
+      "loss": 9.4431,
+      "step": 2881
+    },
+    {
+      "epoch": 0.9941359089341152,
+      "grad_norm": 1.3297460079193115,
+      "learning_rate": 9.101618409340651e-09,
+      "loss": 9.4599,
+      "step": 2882
+    },
+    {
+      "epoch": 0.9944808554674025,
+      "grad_norm": 1.3250812292099,
+      "learning_rate": 8.06236119351711e-09,
+      "loss": 9.4787,
+      "step": 2883
+    },
+    {
+      "epoch": 0.9948258020006899,
+      "grad_norm": 1.3159713745117188,
+      "learning_rate": 7.086082703949837e-09,
+      "loss": 9.4653,
+      "step": 2884
+    },
+    {
+      "epoch": 0.9951707485339772,
+      "grad_norm": 1.3725025653839111,
+      "learning_rate": 6.172784170532797e-09,
+      "loss": 9.5064,
+      "step": 2885
+    },
+    {
+      "epoch": 0.9955156950672646,
+      "grad_norm": 1.3856096267700195,
+      "learning_rate": 5.3224667438123155e-09,
+      "loss": 9.4516,
+      "step": 2886
+    },
+    {
+      "epoch": 0.9958606416005519,
+      "grad_norm": 1.391958475112915,
+      "learning_rate": 4.53513149500373e-09,
+      "loss": 9.4719,
+      "step": 2887
+    },
+    {
+      "epoch": 0.9962055881338393,
+      "grad_norm": 1.5442167520523071,
+      "learning_rate": 3.810779415974741e-09,
+      "loss": 9.4641,
+      "step": 2888
+    },
+    {
+      "epoch": 0.9965505346671266,
+      "grad_norm": 1.4803158044815063,
+      "learning_rate": 3.1494114192509585e-09,
+      "loss": 9.4186,
+      "step": 2889
+    },
+    {
+      "epoch": 0.9968954812004139,
+      "grad_norm": 1.4997535943984985,
+      "learning_rate": 2.5510283379992504e-09,
+      "loss": 9.4915,
+      "step": 2890
+    },
+    {
+      "epoch": 0.9972404277337013,
+      "grad_norm": 1.5729633569717407,
+      "learning_rate": 2.0156309260610517e-09,
+      "loss": 9.423,
+      "step": 2891
+    },
+    {
+      "epoch": 0.9975853742669886,
+      "grad_norm": 1.605699062347412,
+      "learning_rate": 1.5432198579079516e-09,
+      "loss": 9.4781,
+      "step": 2892
+    },
+    {
+      "epoch": 0.997930320800276,
+      "grad_norm": 1.4182220697402954,
+      "learning_rate": 1.1337957286805534e-09,
+      "loss": 9.453,
+      "step": 2893
+    },
+    {
+      "epoch": 0.9982752673335633,
+      "grad_norm": 1.5460253953933716,
+      "learning_rate": 7.873590541551679e-10,
+      "loss": 9.5107,
+      "step": 2894
+    },
+    {
+      "epoch": 0.9986202138668506,
+      "grad_norm": 1.456726312637329,
+      "learning_rate": 5.039102707715682e-10,
+      "loss": 9.3826,
+      "step": 2895
+    },
+    {
+      "epoch": 0.9989651604001379,
+      "grad_norm": 1.55360746383667,
+      "learning_rate": 2.8344973560523456e-10,
+      "loss": 9.394,
+      "step": 2896
+    },
+    {
+      "epoch": 0.9993101069334254,
+      "grad_norm": 1.6202263832092285,
+      "learning_rate": 1.2597772639511006e-10,
+      "loss": 9.4164,
+      "step": 2897
+    },
+    {
+      "epoch": 0.9996550534667127,
+      "grad_norm": 1.6138488054275513,
+      "learning_rate": 3.1494441515844684e-11,
+      "loss": 9.3771,
+      "step": 2898
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.670608639717102,
+      "learning_rate": 0.0,
+      "loss": 9.3193,
+      "step": 2899
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 601758459101184.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null