Update for cnlpt v0.6.1. lr=1e-5, layer=12, bs=8

Browse files

Files changed (6) hide show

added_tokens.json +10 -1
config.json +62 -29
pytorch_model.bin +2 -2
special_tokens_map.json +17 -1
tokenizer.json +42 -28
tokenizer_config.json +26 -1

added_tokens.json CHANGED Viewed

	@@ -1 +1,10 @@
1	- {~~"</a2>": 28900, "</e>": 28896, "<a2>": 28899, "<cr>": 28901, "<a1>": 28897, "</a1>": 28898, "<e>": 28895, "<neg>": 28902}~~

+{
+  "</a1>": 28898,
+  "</a2>": 28900,
+  "</e>": 28896,
+  "<a1>": 28897,
+  "<a2>": 28899,
+  "<cr>": 28901,
+  "<e>": 28895,
+  "<neg>": 28902
+}

config.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "architectures": [
     "CnlpModelForClassification"
   ],
   "encoder_config": {
     "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
     "add_cross_attention": false,
@@ -66,6 +67,7 @@
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
@@ -73,7 +75,7 @@
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
-    "transformers_version": "4.18.0",
     "type_vocab_size": 2,
     "typical_p": 1.0,
     "use_bfloat16": false,
@@ -81,42 +83,73 @@
     "vocab_size": 28903
   },
   "encoder_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "layer": 12,
   "model_type": "cnlpt",
-  "num_labels_list": [
-    2,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
   "num_rel_attention_heads": 12,
   "rel_attention_head_dims": 64,
-  "relations": [
-    false,
-    false,
-    false,
-    false,
-    false,
-    false,
-    false
-  ],
-  "tagger": [
-    false,
-    false,
-    false,
-    false,
-    false,
-    false,
-    false
-  ],
   "tokens": false,
   "torch_dtype": "float32",
-  "transformers_version": "4.18.0",
   "use_prior_tasks": false,
   "vocab_size": 28903
 }

   "architectures": [
     "CnlpModelForClassification"
   ],
+  "cnlpt_version": "0.7.0",
   "encoder_config": {
     "_name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
     "add_cross_attention": false,
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
+    "tf_legacy_loss": false,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
+    "transformers_version": "4.22.2",
     "type_vocab_size": 2,
     "typical_p": 1.0,
     "use_bfloat16": false,
     "vocab_size": 28903
   },
   "encoder_name": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
+  "finetuning_task": [
+    "Process (Thought Process)",
+    "Content (Thought Content)",
+    "Mood",
+    "Substance",
+    "Interpersonal",
+    "Occupation",
+    "Appearance"
+  ],
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
+  "hier_head_config": null,
+  "label_dictionary": {
+    "Appearance": [
+      "No",
+      "Yes"
+    ],
+    "Content (Thought Content)": [
+      "No",
+      "Yes"
+    ],
+    "Interpersonal": [
+      "No",
+      "Yes"
+    ],
+    "Mood": [
+      "No",
+      "Yes"
+    ],
+    "Occupation": [
+      "No",
+      "Yes"
+    ],
+    "Process (Thought Process)": [
+      "No",
+      "Yes"
+    ],
+    "Substance": [
+      "No",
+      "Yes"
+    ]
+  },
   "layer": 12,
   "model_type": "cnlpt",
   "num_rel_attention_heads": 12,
   "rel_attention_head_dims": 64,
+  "relations": {
+    "Appearance": false,
+    "Content (Thought Content)": false,
+    "Interpersonal": false,
+    "Mood": false,
+    "Occupation": false,
+    "Process (Thought Process)": false,
+    "Substance": false
+  },
+  "tagger": {
+    "Appearance": false,
+    "Content (Thought Content)": false,
+    "Interpersonal": false,
+    "Mood": false,
+    "Occupation": false,
+    "Process (Thought Process)": false,
+    "Substance": false
+  },
   "tokens": false,
   "torch_dtype": "float32",
+  "transformers_version": "4.22.2",
   "use_prior_tasks": false,
   "vocab_size": 28903
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69e4518295ba2aac6053ba04d7d24304903779bb4e701da8f81f3bd462cf4821
-size 449614969

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdb229677a7f2a2f96e19876627157e6eddef8d833efa8e514efe9eaae05079a
+size 449617245

special_tokens_map.json CHANGED Viewed

	@@ -1 +1,17 @@
1	- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"]}

+{
+  "additional_special_tokens": [
+    "<e>",
+    "</e>",
+    "<a1>",
+    "</a1>",
+    "<a2>",
+    "</a2>",
+    "<cr>",
+    "<neg>"
+  ],
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json CHANGED Viewed

@@ -1,124 +1,138 @@
 {
   "version": "1.0",
-  "truncation": null,
-  "padding": null,
   "added_tokens": [
     {
       "id": 0,
-      "special": true,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 1,
-      "special": true,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 2,
-      "special": true,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 3,
-      "special": true,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 4,
-      "special": true,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28895,
-      "special": true,
       "content": "<e>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28896,
-      "special": true,
       "content": "</e>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28897,
-      "special": true,
       "content": "<a1>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28898,
-      "special": true,
       "content": "</a1>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28899,
-      "special": true,
       "content": "<a2>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28900,
-      "special": true,
       "content": "</a2>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28901,
-      "special": true,
       "content": "<cr>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     },
     {
       "id": 28902,
-      "special": true,
       "content": "<neg>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
-      "normalized": false
     }
   ],
   "normalizer": {

 {
   "version": "1.0",
+  "truncation": {
+    "direction": "Right",
+    "max_length": 128,
+    "strategy": "LongestFirst",
+    "stride": 0
+  },
+  "padding": {
+    "strategy": {
+      "Fixed": 128
+    },
+    "direction": "Right",
+    "pad_to_multiple_of": null,
+    "pad_id": 0,
+    "pad_type_id": 0,
+    "pad_token": "[PAD]"
+  },
   "added_tokens": [
     {
       "id": 0,
       "content": "[PAD]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 1,
       "content": "[UNK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 2,
       "content": "[CLS]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 3,
       "content": "[SEP]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 4,
       "content": "[MASK]",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28895,
       "content": "<e>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28896,
       "content": "</e>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28897,
       "content": "<a1>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28898,
       "content": "</a1>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28899,
       "content": "<a2>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28900,
       "content": "</a2>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28901,
       "content": "<cr>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     },
     {
       "id": 28902,
       "content": "<neg>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": {

tokenizer_config.json CHANGED Viewed

	@@ -1 +1,26 @@
1	- {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "add_prefix_space": true, "additional_special_tokens": ["<e>", "</e>", "<a1>", "</a1>", "<a2>", "</a2>", "<cr>", "<neg>"], "special_tokens_map_file": null, "name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}

+{
+  "add_prefix_space": true,
+  "additional_special_tokens": [
+    "<e>",
+    "</e>",
+    "<a1>",
+    "</a1>",
+    "<a2>",
+    "</a2>",
+    "<cr>",
+    "<neg>"
+  ],
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "name_or_path": "microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract",
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}