add model

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-license: apache-2.0
 tags:
 - generated_from_trainer
 datasets:
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
 # gpt-neo-125M-Byethon
-This model is a fine-tuned version of [EleutherAI/gpt-neo-125M](https://huggingface.co/EleutherAI/gpt-neo-125M) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6258
 ## Model description
@@ -50,9 +50,9 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| No log        | 1.0   | 31   | 0.8309          |
-| No log        | 2.0   | 62   | 0.6676          |
-| No log        | 3.0   | 93   | 0.6258          |
 ### Framework versions

 ---
+license: mit
 tags:
 - generated_from_trainer
 datasets:
 # gpt-neo-125M-Byethon
+This model is a fine-tuned version of [Sentdex/GPyT](https://huggingface.co/Sentdex/GPyT) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.6310
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| No log        | 1.0   | 31   | 2.6150          |
+| No log        | 2.0   | 62   | 1.8266          |
+| No log        | 3.0   | 93   | 1.6310          |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,46 +1,25 @@
 {
-  "_name_or_path": "EleutherAI/gpt-neo-125M",
   "activation_function": "gelu_new",
   "architectures": [
-    "GPTNeoForCausalLM"
   ],
-  "attention_dropout": 0,
-  "attention_layers": [
-    "global",
-    "local",
-    "global",
-    "local",
-    "global",
-    "local",
-    "global",
-    "local",
-    "global",
-    "local",
-    "global",
-    "local"
-  ],
-  "attention_types": [
-    [
-      [
-        "global",
-        "local"
-      ],
-      6
-    ]
-  ],
-  "bos_token_id": 50256,
-  "embed_dropout": 0,
-  "eos_token_id": 50256,
   "gradient_checkpointing": false,
-  "hidden_size": 768,
   "initializer_range": 0.02,
-  "intermediate_size": null,
   "layer_norm_epsilon": 1e-05,
-  "max_position_embeddings": 2048,
-  "model_type": "gpt_neo",
-  "num_heads": 12,
-  "num_layers": 12,
-  "resid_dropout": 0,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
   "summary_proj_to_labels": true,
@@ -49,6 +28,5 @@
   "torch_dtype": "float32",
   "transformers_version": "4.10.0",
   "use_cache": true,
-  "vocab_size": 50257,
-  "window_size": 256
 }

 {
+  "_name_or_path": "Sentdex/GPyT",
   "activation_function": "gelu_new",
   "architectures": [
+    "GPT2LMHeadModel"
   ],
+  "attn_pdrop": 0.1,
+  "bos_token_id": 0,
+  "embd_pdrop": 0.1,
+  "eos_token_id": 2,
   "gradient_checkpointing": false,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": null,
+  "n_layer": 12,
+  "n_positions": 1024,
+  "resid_pdrop": 0.1,
+  "scale_attn_weights": true,
   "summary_activation": null,
   "summary_first_dropout": 0.1,
   "summary_proj_to_labels": true,
   "torch_dtype": "float32",
   "transformers_version": "4.10.0",
   "use_cache": true,
+  "vocab_size": 52000
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a947f7279860c629884849659ee37d434ece54cfc6a62e041aaa3d8d90ce0ba0
-size 526017245

 version https://git-lfs.github.com/spec/v1
+oid sha256:db8ada5d04a75b73895b09ad0480668c25cf2b932efb03a58892523022d486cc
+size 515758313

runs/Sep07_20-19-12_4519ca7e7339/1631045956.9950128/events.out.tfevents.1631045956.4519ca7e7339.87.4 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:834c0e3a5d71bdcddd9ff02433829d5e0a16b946d5a4024cd34309e016f5d188
+size 4181

runs/Sep07_20-19-12_4519ca7e7339/events.out.tfevents.1631045956.4519ca7e7339.87.3 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f470f9d42b68219266fe8ff4b3749079052a30b5801d29b9e221a762d368869a
+size 4174

runs/Sep07_20-19-12_4519ca7e7339/events.out.tfevents.1631046110.4519ca7e7339.87.5 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:470099022faa808d3c57903d82443b88441991469f9493661ddfcd96ae7e0aab
+size 306

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba53c8a4d78e42c88f4b3ab1fd0dc5f6e3b1647b30dbc2fccee5e497bc284012
 size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c12a69a8c8ca3816ecfaed1c985a39a507271f90d0aaca8329049e442a6e7e
 size 2671