pritoms commited on
Commit
fe378e8
1 Parent(s): a952cf9
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - null
7
+ model-index:
8
+ - name: gpt-neo-125M-finetuned-pgt
9
+ results:
10
+ - task:
11
+ name: Causal Language Modeling
12
+ type: text-generation
13
+ ---
14
+
15
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
+ should probably proofread and complete it, then remove this comment. -->
17
+
18
+ # gpt-neo-125M-finetuned-pgt
19
+
20
+ This model is a fine-tuned version of [EleutherAI/gpt-neo-125M](https://huggingface.co/EleutherAI/gpt-neo-125M) on the None dataset.
21
+ It achieves the following results on the evaluation set:
22
+ - Loss: 2.8697
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 2e-05
42
+ - train_batch_size: 8
43
+ - eval_batch_size: 8
44
+ - seed: 42
45
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
+ - lr_scheduler_type: linear
47
+ - num_epochs: 3.0
48
+
49
+ ### Training results
50
+
51
+ | Training Loss | Epoch | Step | Validation Loss |
52
+ |:-------------:|:-----:|:----:|:---------------:|
53
+ | No log | 1.0 | 193 | 3.1904 |
54
+ | No log | 2.0 | 386 | 2.9441 |
55
+ | 3.2556 | 3.0 | 579 | 2.8697 |
56
+
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 4.10.0
61
+ - Pytorch 1.9.0+cu102
62
+ - Datasets 1.11.0
63
+ - Tokenizers 0.10.3
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/gpt-neo-125M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local",
17
+ "global",
18
+ "local",
19
+ "global",
20
+ "local"
21
+ ],
22
+ "attention_types": [
23
+ [
24
+ [
25
+ "global",
26
+ "local"
27
+ ],
28
+ 6
29
+ ]
30
+ ],
31
+ "bos_token_id": 50256,
32
+ "embed_dropout": 0,
33
+ "eos_token_id": 50256,
34
+ "gradient_checkpointing": false,
35
+ "hidden_size": 768,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": null,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "max_position_embeddings": 2048,
40
+ "model_type": "gpt_neo",
41
+ "num_heads": 12,
42
+ "num_layers": 12,
43
+ "resid_dropout": 0,
44
+ "summary_activation": null,
45
+ "summary_first_dropout": 0.1,
46
+ "summary_proj_to_labels": true,
47
+ "summary_type": "cls_index",
48
+ "summary_use_proj": true,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.10.0",
51
+ "use_cache": true,
52
+ "vocab_size": 50257,
53
+ "window_size": 256
54
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e3fa53644c442eec005c4832f829ae16410d188a6a506ee24aa09a9040104bf
3
+ size 526017245
runs/Sep05_14-36-30_7221f16f3f12/1630852596.8555255/events.out.tfevents.1630852596.7221f16f3f12.91.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b65aaf7918c34440443d16426b71869b19f823ccc1f7054d37dc03b2c95953f4
3
+ size 4187
runs/Sep05_14-36-30_7221f16f3f12/events.out.tfevents.1630852596.7221f16f3f12.91.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:342ab83531ce14916ac2fd0b5af1e736b24094b1307437a6d08fbf16d4ce1df5
3
+ size 4685
runs/Sep05_14-36-30_7221f16f3f12/events.out.tfevents.1630852809.7221f16f3f12.91.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d4a209f37f11ca038ec2ea1ad10f1bbeb00b50058af6ce53f4552c2275d84f0
3
+ size 311
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ef92119ff3c039d295c19565faeecc374304d4d0a18e4050b5a89a58e92389
3
+ size 2671