Norod78
/

distilgpt2-base-pretrained-he

@@ -1,4 +1,5 @@
 {
   "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
@@ -24,6 +25,7 @@
   "n_inner": null,
   "n_layer": 6,
   "n_positions": 1024,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
@@ -37,6 +39,7 @@
       "max_length": 50
     }
   },
   "transformers_version": "4.9.0.dev0",
   "use_cache": true,
   "vocab_size": 50257

 {
+  "_name_or_path": "./distilgpt2-pretrained-he",
   "_num_labels": 1,
   "activation_function": "gelu_new",
   "architectures": [
   "n_inner": null,
   "n_layer": 6,
   "n_positions": 1024,
+  "pad_token_id": 50257,
   "resid_pdrop": 0.1,
   "scale_attn_weights": true,
   "summary_activation": null,
       "max_length": 50
     }
   },
+  "torch_dtype": "float32",
   "transformers_version": "4.9.0.dev0",
   "use_cache": true,
   "vocab_size": 50257

flax_model.msgpack → pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0246ab09910a61fafb618e5b1e64abf1e8c8a88e6c984b3505a8aea1e936555
-size 248885212

 version https://git-lfs.github.com/spec/v1
+oid sha256:7853d0c6cbd54e6790f32280ed40bb1747b08ff855aeb4bf6d63e2b8e74ee52f
+size 333973553