huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
trainer_state.json +128 -6
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/morgenshtern")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/22rvzlbq/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/320wuvzx/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/morgenshtern")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/19ghjsta/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3the5qdy/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -35,7 +35,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.11.2",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 0.~~9282976984977722~~, "eval_runtime": 7.~~0681~~, "eval_samples_per_second": 20.~~656~~, "eval_steps_per_second": 2.~~688~~, "epoch": 3.0}


1	+ {"eval_loss": 0.8179315328598022, "eval_runtime": 7.3257, "eval_samples_per_second": 21.158, "eval_steps_per_second": 2.73, "epoch": 3.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:290b054aa26a75472ea89462728e1fbe01bb1cbbb7bf90d372ad250db6831111
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bfe735b97d1edf92f1881ff486f7bbdb5e1c909ebc2fe3797e20c998d462eef
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfb92c5db45ae57ec8704270fd02cd24309911146a62e4ffdfe187cdfacc2cca
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c09c40bf4dfd5fd306d702098d94d3bbdc47999df4e071987a459220d393d0d
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75dfc06c83e4ec2c68737881be276359157fac03863eda5033a21d9e80f92ef9
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:e79396262fb67544ae8b77d0955613d7d836ace7dac0bfc78d519268a6d4270c
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffc74e881d44215266a1c99f799520e15420c54f0cc889ce0ad363a9b0849b03
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad6fe78f0f512530d4eb0b75148f257776f4270926c8eeb3605dc42ed450ad9f
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b9d873c8cacf8459303fcc5fcff12d277bec46420342ef76b13541e314fd254
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:592ef97b2b907edd507cff14ca8b3cd3c3eaddc58fa33aedd4b1cf49372e69c1
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 0.9282976984977722,
-  "best_model_checkpoint": "output/morgenshtern/checkpoint-196",
-  "epoch": 2.0,
-  "global_step": 196,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -256,11 +256,133 @@
       "eval_samples_per_second": 20.696,
       "eval_steps_per_second": 2.693,
       "step": 196
     }
   ],
-  "max_steps": 294,
   "num_train_epochs": 3,
-  "total_flos": 203546492928000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 0.8179315328598022,
+  "best_model_checkpoint": "output/morgenshtern/checkpoint-291",
+  "epoch": 3.0,
+  "global_step": 291,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 20.696,
       "eval_steps_per_second": 2.693,
       "step": 196
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 2.8937315607040204e-06,
+      "loss": 1.0557,
+      "step": 200
+    },
+    {
+      "epoch": 2.11,
+      "learning_rate": 6.931910561541759e-06,
+      "loss": 0.9196,
+      "step": 205
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 1.2583732714275138e-05,
+      "loss": 0.9577,
+      "step": 210
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 1.9701309148087243e-05,
+      "loss": 1.0242,
+      "step": 215
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 2.8098397211793345e-05,
+      "loss": 0.9276,
+      "step": 220
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 3.755527380754418e-05,
+      "loss": 0.815,
+      "step": 225
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 4.782448479340004e-05,
+      "loss": 0.9471,
+      "step": 230
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 5.863732001255875e-05,
+      "loss": 0.937,
+      "step": 235
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 6.97108445198519e-05,
+      "loss": 0.9469,
+      "step": 240
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 8.075530202233491e-05,
+      "loss": 0.9792,
+      "step": 245
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 9.148169681124647e-05,
+      "loss": 0.9752,
+      "step": 250
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 0.00010160935579206932,
+      "loss": 1.0419,
+      "step": 255
+    },
+    {
+      "epoch": 2.68,
+      "learning_rate": 0.00011087327274022367,
+      "loss": 1.0001,
+      "step": 260
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 0.00011903104260831395,
+      "loss": 0.9188,
+      "step": 265
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 0.00012586920443780796,
+      "loss": 1.0406,
+      "step": 270
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 0.0001312088269028256,
+      "loss": 0.9707,
+      "step": 275
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 0.0001349101903314451,
+      "loss": 1.0249,
+      "step": 280
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 0.0001368764426920297,
+      "loss": 1.0981,
+      "step": 285
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 0.00013705613387990704,
+      "loss": 1.0821,
+      "step": 290
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.8179315328598022,
+      "eval_runtime": 7.3188,
+      "eval_samples_per_second": 21.178,
+      "eval_steps_per_second": 2.733,
+      "step": 291
     }
   ],
+  "max_steps": 291,
   "num_train_epochs": 3,
+  "total_flos": 302053588992000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:640d62725592ddeb2633cb42cfa4872adce692e463f0c31e16d57bedfc0f8439
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:96ee95e4c456b61288f96b2abd17f7901d4c06694edf41fdc6b197ab27bbeae6
+size 2863