AlekseyKorshuk
commited on
Commit
•
ffe42be
1
Parent(s):
1a17623
huggingartists
Browse files- README.md +4 -4
- config.json +4 -2
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- tokenizer.json +0 -0
- trainer_state.json +87 -7
- training_args.bin +2 -2
README.md
CHANGED
@@ -14,7 +14,7 @@ widget:
|
|
14 |
<div class="inline-flex flex-col" style="line-height: 1.5;">
|
15 |
<div class="flex">
|
16 |
<div
|
17 |
-
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url('https://images.genius.com/
|
18 |
</div>
|
19 |
</div>
|
20 |
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
|
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/queen")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
14 |
<div class="inline-flex flex-col" style="line-height: 1.5;">
|
15 |
<div class="flex">
|
16 |
<div
|
17 |
+
style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url('https://images.genius.com/97bcb5755cb9780d76b37726a0ce4bef.1000x1000x1.jpg')">
|
18 |
</div>
|
19 |
</div>
|
20 |
<div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
|
|
|
45 |
dataset = load_dataset("huggingartists/queen")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1ddcmutf/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Queen's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1dhu8z5q/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
@@ -18,7 +18,9 @@
|
|
18 |
"n_inner": null,
|
19 |
"n_layer": 12,
|
20 |
"n_positions": 1024,
|
|
|
21 |
"resid_pdrop": 0.1,
|
|
|
22 |
"scale_attn_weights": true,
|
23 |
"summary_activation": null,
|
24 |
"summary_first_dropout": 0.1,
|
@@ -35,7 +37,7 @@
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
-
"transformers_version": "4.
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "queen",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
18 |
"n_inner": null,
|
19 |
"n_layer": 12,
|
20 |
"n_positions": 1024,
|
21 |
+
"reorder_and_upcast_attn": false,
|
22 |
"resid_pdrop": 0.1,
|
23 |
+
"scale_attn_by_inverse_layer_idx": false,
|
24 |
"scale_attn_weights": true,
|
25 |
"summary_activation": null,
|
26 |
"summary_first_dropout": 0.1,
|
|
|
37 |
}
|
38 |
},
|
39 |
"torch_dtype": "float32",
|
40 |
+
"transformers_version": "4.16.2",
|
41 |
"use_cache": true,
|
42 |
"vocab_size": 50257
|
43 |
}
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.0593422651290894, "eval_runtime": 1.2351, "eval_samples_per_second": 74.489, "eval_steps_per_second": 9.716, "epoch": 10.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f71c5d51cdf52a930cd7e68b3903da1eb422d09bd06f91080d5d80355a54ba3c
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b8a2f7accefebd6dee4b8dea20bafd47f1f9acf8efc62324b4a2287db757050
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d37c527c1768f42033ebf3f95e514029cb59df6fe857c02fe5b6c2968b0e7dd
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f49bf0f2acadddffcdc0a4e3487fd525abbe1771468cbd89c3052c1ed8d61d77
|
3 |
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90d90db3c01e5ca255a7e75446ed55bd62e0e6b8c2052a1a18ccb2f8838ed976
|
3 |
size 623
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/queen/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -830,11 +830,91 @@
|
|
830 |
"eval_samples_per_second": 22.699,
|
831 |
"eval_steps_per_second": 2.87,
|
832 |
"step": 621
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
833 |
}
|
834 |
],
|
835 |
-
"max_steps":
|
836 |
-
"num_train_epochs":
|
837 |
-
"total_flos":
|
838 |
"trial_name": null,
|
839 |
"trial_params": null
|
840 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.0593422651290894,
|
3 |
+
"best_model_checkpoint": "output/queen/checkpoint-680",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"global_step": 680,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
830 |
"eval_samples_per_second": 22.699,
|
831 |
"eval_steps_per_second": 2.87,
|
832 |
"step": 621
|
833 |
+
},
|
834 |
+
{
|
835 |
+
"epoch": 9.19,
|
836 |
+
"learning_rate": 1.2005173242556402e-05,
|
837 |
+
"loss": 1.3691,
|
838 |
+
"step": 625
|
839 |
+
},
|
840 |
+
{
|
841 |
+
"epoch": 9.26,
|
842 |
+
"learning_rate": 2.2384478845846314e-05,
|
843 |
+
"loss": 1.5005,
|
844 |
+
"step": 630
|
845 |
+
},
|
846 |
+
{
|
847 |
+
"epoch": 9.34,
|
848 |
+
"learning_rate": 3.5218932770313436e-05,
|
849 |
+
"loss": 1.4065,
|
850 |
+
"step": 635
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 9.41,
|
854 |
+
"learning_rate": 4.982671888105515e-05,
|
855 |
+
"loss": 1.3225,
|
856 |
+
"step": 640
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 9.49,
|
860 |
+
"learning_rate": 6.543181473690211e-05,
|
861 |
+
"loss": 1.0367,
|
862 |
+
"step": 645
|
863 |
+
},
|
864 |
+
{
|
865 |
+
"epoch": 9.56,
|
866 |
+
"learning_rate": 8.120521692221662e-05,
|
867 |
+
"loss": 1.1784,
|
868 |
+
"step": 650
|
869 |
+
},
|
870 |
+
{
|
871 |
+
"epoch": 9.63,
|
872 |
+
"learning_rate": 9.630898093421192e-05,
|
873 |
+
"loss": 1.2778,
|
874 |
+
"step": 655
|
875 |
+
},
|
876 |
+
{
|
877 |
+
"epoch": 9.71,
|
878 |
+
"learning_rate": 0.00010994073605561679,
|
879 |
+
"loss": 1.3754,
|
880 |
+
"step": 660
|
881 |
+
},
|
882 |
+
{
|
883 |
+
"epoch": 9.78,
|
884 |
+
"learning_rate": 0.00012137631040942535,
|
885 |
+
"loss": 1.1983,
|
886 |
+
"step": 665
|
887 |
+
},
|
888 |
+
{
|
889 |
+
"epoch": 9.85,
|
890 |
+
"learning_rate": 0.00013000820178695732,
|
891 |
+
"loss": 1.3077,
|
892 |
+
"step": 670
|
893 |
+
},
|
894 |
+
{
|
895 |
+
"epoch": 9.93,
|
896 |
+
"learning_rate": 0.0001353778505291435,
|
897 |
+
"loss": 1.4969,
|
898 |
+
"step": 675
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"epoch": 10.0,
|
902 |
+
"learning_rate": 0.0001372,
|
903 |
+
"loss": 1.3263,
|
904 |
+
"step": 680
|
905 |
+
},
|
906 |
+
{
|
907 |
+
"epoch": 10.0,
|
908 |
+
"eval_loss": 1.0593422651290894,
|
909 |
+
"eval_runtime": 1.2161,
|
910 |
+
"eval_samples_per_second": 75.649,
|
911 |
+
"eval_steps_per_second": 9.867,
|
912 |
+
"step": 680
|
913 |
}
|
914 |
],
|
915 |
+
"max_steps": 680,
|
916 |
+
"num_train_epochs": 10,
|
917 |
+
"total_flos": 704182026240000.0,
|
918 |
"trial_name": null,
|
919 |
"trial_params": null
|
920 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bfe077b70ca57958721fcbf6c5a404f931dd10af1a32aa26767d45cbe093bfa
|
3 |
+
size 3055
|