joelniklaus commited on Apr 12, 2023

Commit

9076e22

•

1 Parent(s): dbccfdf

Training in progress, step 450000

Browse files

Files changed (17) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +311 -3
last-checkpoint/training_args.bin +2 -2
pytorch_model.bin +1 -1
runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/1680890667.0265427/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.1 +3 -0
runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.0 +3 -0
training_args.bin +2 -2

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4adbd51f12f37875580b0dc03a54ab186184f725b7059c35d96ffc874e71eecb
 size 2693742553

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d9fd74a3d7db813f8faa7594f4fc6b6d334c4b2c5b74ed5d009587c72e18d9a
 size 2693742553

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ead6458d093c2c299dbf7a5f0fafd5c3d00c89c8e8d26b98e4f4fbf42560dad2
 size 1346893675

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8a5e1b62dd95881e02c04a76de9ac1f4faee2bb1833f6d1760bd79e8781b461
 size 1346893675

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
 size 13611

 version https://git-lfs.github.com/spec/v1
+oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
 size 13611

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e4ecef8b58c710458716a0153f8519567dd2a15c4728bc445f0af4d3fb15782
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:78e735efa7e40e0dd22dcac5cb3724b0cbe120563d603ea4b62f22b0f40fc602
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.094734,
-  "global_step": 400000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2470,11 +2470,319 @@
       "eval_samples_per_second": 275.486,
       "eval_steps_per_second": 4.353,
       "step": 400000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 9223372036854775807,
-  "total_flos": 2.385998169502227e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.144733,
+  "global_step": 450000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 275.486,
       "eval_steps_per_second": 4.353,
       "step": 400000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.993324133116726e-05,
+      "loss": 0.8315,
+      "step": 401000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.978149344295242e-05,
+      "loss": 0.786,
+      "step": 402000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.962952922749457e-05,
+      "loss": 0.9338,
+      "step": 403000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.947735034665002e-05,
+      "loss": 0.9648,
+      "step": 404000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.932495846462261e-05,
+      "loss": 0.8149,
+      "step": 405000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.917235524794558e-05,
+      "loss": 0.7549,
+      "step": 406000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.901954236546323e-05,
+      "loss": 0.7084,
+      "step": 407000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.886652148831279e-05,
+      "loss": 0.7488,
+      "step": 408000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.871329428990602e-05,
+      "loss": 0.7645,
+      "step": 409000
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 6.855986244591104e-05,
+      "loss": 0.7303,
+      "step": 410000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.840622763423391e-05,
+      "loss": 0.6498,
+      "step": 411000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.825239153500029e-05,
+      "loss": 0.7127,
+      "step": 412000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.809835583053715e-05,
+      "loss": 0.8216,
+      "step": 413000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.794412220535426e-05,
+      "loss": 0.817,
+      "step": 414000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.778969234612584e-05,
+      "loss": 0.7474,
+      "step": 415000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.763506794167208e-05,
+      "loss": 0.7504,
+      "step": 416000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.748025068294067e-05,
+      "loss": 0.8277,
+      "step": 417000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.732524226298841e-05,
+      "loss": 0.9038,
+      "step": 418000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.71700443769625e-05,
+      "loss": 0.9412,
+      "step": 419000
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 6.701465872208216e-05,
+      "loss": 0.8328,
+      "step": 420000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.685908699762002e-05,
+      "loss": 0.7013,
+      "step": 421000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.670333090488356e-05,
+      "loss": 0.8064,
+      "step": 422000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.654739214719641e-05,
+      "loss": 0.8228,
+      "step": 423000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.639127242987988e-05,
+      "loss": 0.831,
+      "step": 424000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.623497346023418e-05,
+      "loss": 0.606,
+      "step": 425000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.607849694751977e-05,
+      "loss": 0.596,
+      "step": 426000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.592184460293877e-05,
+      "loss": 0.7339,
+      "step": 427000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.576501813961609e-05,
+      "loss": 0.8095,
+      "step": 428000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.56080192725808e-05,
+      "loss": 0.8368,
+      "step": 429000
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 6.545084971874738e-05,
+      "loss": 0.6778,
+      "step": 430000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.529351119689688e-05,
+      "loss": 0.602,
+      "step": 431000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.513600542765817e-05,
+      "loss": 0.8553,
+      "step": 432000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.497833413348909e-05,
+      "loss": 0.9017,
+      "step": 433000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.48204990386577e-05,
+      "loss": 0.8937,
+      "step": 434000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.466250186922325e-05,
+      "loss": 0.7829,
+      "step": 435000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.450434435301751e-05,
+      "loss": 0.7589,
+      "step": 436000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.43460282196257e-05,
+      "loss": 0.893,
+      "step": 437000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.418755520036775e-05,
+      "loss": 0.8418,
+      "step": 438000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.402892702827916e-05,
+      "loss": 0.8263,
+      "step": 439000
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 6.387014543809223e-05,
+      "loss": 0.6969,
+      "step": 440000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.371121216621698e-05,
+      "loss": 0.5887,
+      "step": 441000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.355212895072223e-05,
+      "loss": 0.723,
+      "step": 442000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.339289753131649e-05,
+      "loss": 0.7757,
+      "step": 443000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.323351964932908e-05,
+      "loss": 0.7438,
+      "step": 444000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.307399704769099e-05,
+      "loss": 0.6629,
+      "step": 445000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.291433147091583e-05,
+      "loss": 0.6943,
+      "step": 446000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.275452466508077e-05,
+      "loss": 0.769,
+      "step": 447000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.259457837780742e-05,
+      "loss": 0.8287,
+      "step": 448000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.243449435824276e-05,
+      "loss": 0.8262,
+      "step": 449000
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 6.227427435703997e-05,
+      "loss": 0.7464,
+      "step": 450000
+    },
+    {
+      "epoch": 1.14,
+      "eval_loss": 0.45768678188323975,
+      "eval_runtime": 28.0661,
+      "eval_samples_per_second": 178.151,
+      "eval_steps_per_second": 2.815,
+      "step": 450000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 9223372036854775807,
+  "total_flos": 2.684243932962462e+19,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0b02673b13f0fc59a49044f6f1aa1cfe1a6854d2087f76c0de0776564c78579
-size 3439

 version https://git-lfs.github.com/spec/v1
+oid sha256:f147202664426d6d9fdfd0b21608081a18482651bb03dac1298bc70b03736e88
+size 3503

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ead6458d093c2c299dbf7a5f0fafd5c3d00c89c8e8d26b98e4f4fbf42560dad2
 size 1346893675

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8a5e1b62dd95881e02c04a76de9ac1f4faee2bb1833f6d1760bd79e8781b461
 size 1346893675

runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/1680890667.0265427/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf326db418547e90b063718701d268428b727a818a8711e385b7d51bb6ee3de
+size 5494

runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d255afb91fa33400a4d29ba888bd907306caeb39a18e8b1914abc3ef5beccab0
+size 12106

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0b02673b13f0fc59a49044f6f1aa1cfe1a6854d2087f76c0de0776564c78579
-size 3439

 version https://git-lfs.github.com/spec/v1
+oid sha256:f147202664426d6d9fdfd0b21608081a18482651bb03dac1298bc70b03736e88
+size 3503