AlekseyKorshuk commited on
Commit
b469c7e
1 Parent(s): d47dd19

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/the-gazette")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1in06o8p/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on The Gazette's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/imffzt1z) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/imffzt1z/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/the-gazette")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3ck1sdfv/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on The Gazette's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/m1wevlws) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/m1wevlws/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/the-gazette",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a0064ee8364449eae873b401e13781ea28885704a5e64dea29966eff9436880
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0a55b9a88cfc006c97ced67a2e960eed339d9fa2f503a9ca72fc5049a4a9b4
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041424f549c10466c0691914c673106c0cd7a3245b3e587210a5aeaf77492f42
3
  size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:088ddca036c616cf924a3c31abfc7c02e28fc41869e9aec146ff68e468cc60dd
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4dc3d79af3942de48efe1cb4d7112a449e9de67e12eb7ce1bfe71d13d44133
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a8c0f24236751e3b6e048e4a72eb41cd541006d398a0b7286bfce923c3667b9
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ada4d26d0f4f51eec3cea87150053cd5390ffe99beea24846e73e9800641a3f3
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bad83b8d103d4ac9f3bbe85b7ddfd36cfeb31fe3ed911b1bd4a2b3b2f43b8104
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3759f0274d6462c99515ad4f587a39c595bfc6a862d6d53aec4d5f4c94343a76
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:407aaf946156cc59d13c99014290915b74fee0be008a71f0976b3bc407e23501
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/the-gazette", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 17,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -24,11 +24,71 @@
24
  "learning_rate": 4.6324050628611986e-06,
25
  "loss": 4.0138,
26
  "step": 15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  ],
29
- "max_steps": 17,
30
- "num_train_epochs": 1,
31
- "total_flos": 16853336064000.0,
32
  "trial_name": null,
33
  "trial_params": null
34
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
+ "global_step": 68,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
24
  "learning_rate": 4.6324050628611986e-06,
25
  "loss": 4.0138,
26
  "step": 15
27
+ },
28
+ {
29
+ "epoch": 1.18,
30
+ "learning_rate": 1.0275104488948473e-05,
31
+ "loss": 3.7661,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 1.47,
36
+ "learning_rate": 6.227039054081748e-05,
37
+ "loss": 3.7248,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 1.76,
42
+ "learning_rate": 0.00011929601172133719,
43
+ "loss": 3.9344,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 2.06,
48
+ "learning_rate": 0.00013603195463831566,
49
+ "loss": 3.8787,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 2.35,
54
+ "learning_rate": 9.917765120627052e-05,
55
+ "loss": 3.5252,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 2.65,
60
+ "learning_rate": 3.8022348793729525e-05,
61
+ "loss": 3.5674,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 2.94,
66
+ "learning_rate": 1.1680453616843376e-06,
67
+ "loss": 3.4848,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 3.24,
72
+ "learning_rate": 1.7903988278662747e-05,
73
+ "loss": 3.2603,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 3.53,
78
+ "learning_rate": 7.492960945918241e-05,
79
+ "loss": 3.4291,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 3.82,
84
+ "learning_rate": 0.0001269248955110515,
85
+ "loss": 3.4645,
86
+ "step": 65
87
  }
88
  ],
89
+ "max_steps": 68,
90
+ "num_train_epochs": 4,
91
+ "total_flos": 67413344256000.0,
92
  "trial_name": null,
93
  "trial_params": null
94
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:481780d50f34750988b858ec38e71a7790325f55622dd09aa64618acc047e160
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:520a217df91c7cf215f9d566603b63d78168cc24e306e916bf9c950908c9a8f1
3
  size 2671