huggingartists

Browse files

Files changed (13) hide show

README.md +4 -4
config.json +4 -2
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
special_tokens_map.json +5 -1
tokenizer.json +0 -0
tokenizer_config.json +10 -1
trainer_state.json +44 -6
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ widget:
 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
-			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/e4f988f1ee26618c5dd41b59b8ff2b43.1000x1000x1.jpg&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/coldplay")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/399heq03/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3o6fr9bq) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3o6fr9bq/artifacts) is logged and versioned.
 ## How to use

 <div class="inline-flex flex-col" style="line-height: 1.5;">
     <div class="flex">
         <div
+			style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/6cfcc2b1425286fe0d0b8c857c895b63.600x338x200.gif&#39;)">
         </div>
     </div>
     <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
 dataset = load_dataset("huggingartists/coldplay")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/34tqcy7u/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Coldplay's lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/23h7o09h) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/23h7o09h/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "huggingartists/coldplay",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
@@ -18,7 +18,9 @@
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
@@ -35,7 +37,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.11.3",
   "use_cache": true,
   "vocab_size": 50257
 }

 {
+  "_name_or_path": "coldplay",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"
   "n_inner": null,
   "n_layer": 12,
   "n_positions": 1024,
+  "reorder_and_upcast_attn": false,
   "resid_pdrop": 0.1,
+  "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.20.1",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~2095028162002563~~, "eval_runtime": 3.~~4191~~, "eval_samples_per_second": 21.~~643~~, "eval_steps_per_second": 2.~~925~~, "epoch": 19.0}


1	+ {"eval_loss": 1.0933133363723755, "eval_runtime": 1.4407, "eval_samples_per_second": 38.87, "eval_steps_per_second": 4.859, "epoch": 12.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8806c168019cbc1ae2ffed46c832caa9529a17675216fdd1c782cc1176e7331
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:a968012daa02f2aeede1b08301d9d8cdf0b0a0b66138b34e01d21479815aa9d6
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37ac05510aa6c132c3344e1ec9d0f1e37c7031d848f40767d745da3aa0e66a9b
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:2a49be6f0dace143bae78e031981d54f03c5f1f75566034d46b4851b17b2efca
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a7158419658b030a3d8570b711350a55f73d17342aa9920b8e41aff9c03d4a1
-size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfc5669114394eec4c7fd9808510a62c68422cd295b0720550d9742974be536f
+size 510396521

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1096f6f012654cb7b05c9d15f0c8c81dff6f30d1dbde8cf0b9489e06f1331a53
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:3837ecb65e024d5d16e99eee7628f55aed7ce556e170bd1fa6cd6486cd8c4b50
 size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b9430296501fbe754e360495552f3a391e4c1ba2be1a491d6867af0670e74711
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bc86d1066eb2b98a95a6b7ab12eba7384232826b3650f7109aba3e265fc996a
 size 623

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,5 @@
1	- {~~"bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "unk_token": "<\|endoftext\|>"}~~

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,10 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/coldplay", "tokenizer_class": "GPT2Tokenizer"}

+{
+  "add_prefix_space": false,
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "model_max_length": 1024,
+  "name_or_path": "huggingartists/coldplay",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.2095028162002563,
-  "best_model_checkpoint": "output/coldplay/checkpoint-528",
   "epoch": 11.0,
-  "global_step": 528,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -732,11 +732,49 @@
       "eval_samples_per_second": 22.445,
       "eval_steps_per_second": 3.033,
       "step": 528
     }
   ],
-  "max_steps": 912,
-  "num_train_epochs": 19,
-  "total_flos": 549235851264000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.0933133363723755,
+  "best_model_checkpoint": "output/coldplay/checkpoint-550",
   "epoch": 11.0,
+  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.445,
       "eval_steps_per_second": 3.033,
       "step": 528
+    },
+    {
+      "epoch": 10.6,
+      "learning_rate": 4.7401434185878923e-05,
+      "loss": 1.0609,
+      "step": 530
+    },
+    {
+      "epoch": 10.7,
+      "learning_rate": 2.8277931692736518e-05,
+      "loss": 1.2072,
+      "step": 535
+    },
+    {
+      "epoch": 10.8,
+      "learning_rate": 1.3101434185878659e-05,
+      "loss": 1.2221,
+      "step": 540
+    },
+    {
+      "epoch": 10.9,
+      "learning_rate": 3.3575229821524526e-06,
+      "loss": 1.0503,
+      "step": 545
+    },
+    {
+      "epoch": 11.0,
+      "learning_rate": 0.0,
+      "loss": 1.0194,
+      "step": 550
+    },
+    {
+      "epoch": 11.0,
+      "eval_loss": 1.0933133363723755,
+      "eval_runtime": 1.2839,
+      "eval_samples_per_second": 43.616,
+      "eval_steps_per_second": 5.452,
+      "step": 550
     }
   ],
+  "max_steps": 600,
+  "num_train_epochs": 12,
+  "total_flos": 572229550080000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8634fa1cc14205deda566a1ce3471c09a00adb32b1ca2deb15e6602eb34457d2
-size 2863

 version https://git-lfs.github.com/spec/v1
+oid sha256:092ba382e6c8b2a6645125b1644c9103ebd83510860ac51077dcbeccd57b05f0
+size 3311