AlekseyKorshuk
commited on
Commit
•
eaef62e
1
Parent(s):
0f9a51e
huggingartists
Browse files- README.md +3 -3
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +184 -6
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/rihanna")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Rihanna's lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/rihanna")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/ee6eogks/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Rihanna's lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1mvns7x8) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1mvns7x8/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.2033756971359253, "eval_runtime": 4.6245, "eval_samples_per_second": 39.572, "eval_steps_per_second": 4.974, "epoch": 7.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91223b3a97f4c33bf4beb5f6f9bc0bdba4a1266033f550d62ad0677eacb6527e
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb346aee3195e920755952ca3f6d08dbae55ea133ff03b948e5e75f31a13e735
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510396521
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:227d1a1eff8e256ae4ff6404aab3e581a87996e190fb70a13aee77a4b37e346f
|
3 |
size 510396521
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba49a0e1fbf48c63a7ae2a286d185ed913f3a9d7c46fb958fa2659acf47e391d
|
3 |
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:880e20d26c7d0a4b78d5bba31d4851bd1223c0668a0dee28ba1cc31f27011705
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/rihanna/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -990,11 +990,189 @@
|
|
990 |
"eval_samples_per_second": 45.684,
|
991 |
"eval_steps_per_second": 5.887,
|
992 |
"step": 780
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
993 |
}
|
994 |
],
|
995 |
-
"max_steps":
|
996 |
"num_train_epochs": 7,
|
997 |
-
"total_flos":
|
998 |
"trial_name": null,
|
999 |
"trial_params": null
|
1000 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.2033756971359253,
|
3 |
+
"best_model_checkpoint": "output/rihanna/checkpoint-917",
|
4 |
+
"epoch": 7.0,
|
5 |
+
"global_step": 917,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
990 |
"eval_samples_per_second": 45.684,
|
991 |
"eval_steps_per_second": 5.887,
|
992 |
"step": 780
|
993 |
+
},
|
994 |
+
{
|
995 |
+
"epoch": 5.99,
|
996 |
+
"learning_rate": 0.00013718027438920657,
|
997 |
+
"loss": 1.3027,
|
998 |
+
"step": 785
|
999 |
+
},
|
1000 |
+
{
|
1001 |
+
"epoch": 6.0,
|
1002 |
+
"eval_loss": 1.2558701038360596,
|
1003 |
+
"eval_runtime": 3.8315,
|
1004 |
+
"eval_samples_per_second": 47.762,
|
1005 |
+
"eval_steps_per_second": 6.003,
|
1006 |
+
"step": 786
|
1007 |
+
},
|
1008 |
+
{
|
1009 |
+
"epoch": 6.03,
|
1010 |
+
"learning_rate": 0.0001368846170554219,
|
1011 |
+
"loss": 1.0307,
|
1012 |
+
"step": 790
|
1013 |
+
},
|
1014 |
+
{
|
1015 |
+
"epoch": 6.07,
|
1016 |
+
"learning_rate": 0.00013560834225858045,
|
1017 |
+
"loss": 1.3396,
|
1018 |
+
"step": 795
|
1019 |
+
},
|
1020 |
+
{
|
1021 |
+
"epoch": 6.11,
|
1022 |
+
"learning_rate": 0.0001333697782460593,
|
1023 |
+
"loss": 1.3763,
|
1024 |
+
"step": 800
|
1025 |
+
},
|
1026 |
+
{
|
1027 |
+
"epoch": 6.15,
|
1028 |
+
"learning_rate": 0.0001302010724480925,
|
1029 |
+
"loss": 1.3968,
|
1030 |
+
"step": 805
|
1031 |
+
},
|
1032 |
+
{
|
1033 |
+
"epoch": 6.18,
|
1034 |
+
"learning_rate": 0.00012614772981696314,
|
1035 |
+
"loss": 1.2598,
|
1036 |
+
"step": 810
|
1037 |
+
},
|
1038 |
+
{
|
1039 |
+
"epoch": 6.22,
|
1040 |
+
"learning_rate": 0.00012126795934232731,
|
1041 |
+
"loss": 1.6357,
|
1042 |
+
"step": 815
|
1043 |
+
},
|
1044 |
+
{
|
1045 |
+
"epoch": 6.26,
|
1046 |
+
"learning_rate": 0.00011563183812719099,
|
1047 |
+
"loss": 1.5069,
|
1048 |
+
"step": 820
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"epoch": 6.3,
|
1052 |
+
"learning_rate": 0.00010932030502902761,
|
1053 |
+
"loss": 1.3309,
|
1054 |
+
"step": 825
|
1055 |
+
},
|
1056 |
+
{
|
1057 |
+
"epoch": 6.34,
|
1058 |
+
"learning_rate": 0.00010242399831808912,
|
1059 |
+
"loss": 1.2963,
|
1060 |
+
"step": 830
|
1061 |
+
},
|
1062 |
+
{
|
1063 |
+
"epoch": 6.37,
|
1064 |
+
"learning_rate": 9.504195404499683e-05,
|
1065 |
+
"loss": 1.5269,
|
1066 |
+
"step": 835
|
1067 |
+
},
|
1068 |
+
{
|
1069 |
+
"epoch": 6.41,
|
1070 |
+
"learning_rate": 8.728018381001126e-05,
|
1071 |
+
"loss": 1.1606,
|
1072 |
+
"step": 840
|
1073 |
+
},
|
1074 |
+
{
|
1075 |
+
"epoch": 6.45,
|
1076 |
+
"learning_rate": 7.925015235826672e-05,
|
1077 |
+
"loss": 1.4031,
|
1078 |
+
"step": 845
|
1079 |
+
},
|
1080 |
+
{
|
1081 |
+
"epoch": 6.49,
|
1082 |
+
"learning_rate": 7.10671768638214e-05,
|
1083 |
+
"loss": 1.1422,
|
1084 |
+
"step": 850
|
1085 |
+
},
|
1086 |
+
{
|
1087 |
+
"epoch": 6.53,
|
1088 |
+
"learning_rate": 6.284877088998812e-05,
|
1089 |
+
"loss": 1.2245,
|
1090 |
+
"step": 855
|
1091 |
+
},
|
1092 |
+
{
|
1093 |
+
"epoch": 6.56,
|
1094 |
+
"learning_rate": 5.4712956807894146e-05,
|
1095 |
+
"loss": 1.5305,
|
1096 |
+
"step": 860
|
1097 |
+
},
|
1098 |
+
{
|
1099 |
+
"epoch": 6.6,
|
1100 |
+
"learning_rate": 4.677657090818787e-05,
|
1101 |
+
"loss": 1.4329,
|
1102 |
+
"step": 865
|
1103 |
+
},
|
1104 |
+
{
|
1105 |
+
"epoch": 6.64,
|
1106 |
+
"learning_rate": 3.915358554573355e-05,
|
1107 |
+
"loss": 1.0966,
|
1108 |
+
"step": 870
|
1109 |
+
},
|
1110 |
+
{
|
1111 |
+
"epoch": 6.68,
|
1112 |
+
"learning_rate": 3.1953472412537526e-05,
|
1113 |
+
"loss": 1.1648,
|
1114 |
+
"step": 875
|
1115 |
+
},
|
1116 |
+
{
|
1117 |
+
"epoch": 6.72,
|
1118 |
+
"learning_rate": 2.5279630443511272e-05,
|
1119 |
+
"loss": 1.5295,
|
1120 |
+
"step": 880
|
1121 |
+
},
|
1122 |
+
{
|
1123 |
+
"epoch": 6.76,
|
1124 |
+
"learning_rate": 1.9227900931507197e-05,
|
1125 |
+
"loss": 1.1119,
|
1126 |
+
"step": 885
|
1127 |
+
},
|
1128 |
+
{
|
1129 |
+
"epoch": 6.79,
|
1130 |
+
"learning_rate": 1.388519117566634e-05,
|
1131 |
+
"loss": 1.0333,
|
1132 |
+
"step": 890
|
1133 |
+
},
|
1134 |
+
{
|
1135 |
+
"epoch": 6.83,
|
1136 |
+
"learning_rate": 9.328226428505963e-06,
|
1137 |
+
"loss": 1.3515,
|
1138 |
+
"step": 895
|
1139 |
+
},
|
1140 |
+
{
|
1141 |
+
"epoch": 6.87,
|
1142 |
+
"learning_rate": 5.622448064706006e-06,
|
1143 |
+
"loss": 1.2529,
|
1144 |
+
"step": 900
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"epoch": 6.91,
|
1148 |
+
"learning_rate": 2.8210737947035045e-06,
|
1149 |
+
"loss": 1.0788,
|
1150 |
+
"step": 905
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 6.95,
|
1154 |
+
"learning_rate": 9.643334191210031e-07,
|
1155 |
+
"loss": 1.4072,
|
1156 |
+
"step": 910
|
1157 |
+
},
|
1158 |
+
{
|
1159 |
+
"epoch": 6.98,
|
1160 |
+
"learning_rate": 7.88910991585708e-08,
|
1161 |
+
"loss": 0.9972,
|
1162 |
+
"step": 915
|
1163 |
+
},
|
1164 |
+
{
|
1165 |
+
"epoch": 7.0,
|
1166 |
+
"eval_loss": 1.2033756971359253,
|
1167 |
+
"eval_runtime": 4.6484,
|
1168 |
+
"eval_samples_per_second": 39.368,
|
1169 |
+
"eval_steps_per_second": 4.948,
|
1170 |
+
"step": 917
|
1171 |
}
|
1172 |
],
|
1173 |
+
"max_steps": 917,
|
1174 |
"num_train_epochs": 7,
|
1175 |
+
"total_flos": 953846562816000.0,
|
1176 |
"trial_name": null,
|
1177 |
"trial_params": null
|
1178 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3311
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81b335aaeaa4f7d2d36b33d05514125a74b0cc817463fa2fde25b4ec3e8b0a75
|
3 |
size 3311
|