KoichiYasuoka's picture
initial release
b86332a
{
"architectures": [
"ModernBertForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_modernbert.ModernBertConfig",
"AutoModel": "modeling_modernbert.ModernBertModel",
"AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM",
"AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification",
"AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification"
},
"bos_token_id": 0,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 0,
"custom_pipelines": {
"upos": {
"impl": "ud.BellmanFordTokenClassificationPipeline",
"pt": "AutoModelForTokenClassification"
},
"universal-dependencies": {
"impl": "ud.UniversalDependenciesPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"decoder_bias": true,
"deterministic_flash_attn": false,
"embedding_dropout": 0.0,
"eos_token_id": 2,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "ADJ",
"1": "ADJ.",
"2": "ADJ|_",
"3": "ADJ|l-acl",
"4": "ADJ|l-advcl",
"5": "ADJ|l-amod",
"6": "ADJ|l-ccomp",
"7": "ADJ|l-csubj",
"8": "ADJ|l-csubj:outer",
"9": "ADJ|l-nmod",
"10": "ADJ|l-nsubj",
"11": "ADJ|l-obj",
"12": "ADJ|l-obl",
"13": "ADJ|r-acl",
"14": "ADJ|r-amod",
"15": "ADJ|r-dep",
"16": "ADJ|root",
"17": "ADP",
"18": "ADP.",
"19": "ADP|_",
"20": "ADP|l-case",
"21": "ADP|r-case",
"22": "ADP|r-fixed",
"23": "ADV",
"24": "ADV.",
"25": "ADV|_",
"26": "ADV|l-advcl",
"27": "ADV|l-advmod",
"28": "ADV|l-obj",
"29": "ADV|r-dep",
"30": "ADV|root",
"31": "AUX",
"32": "AUX.",
"33": "AUX|Polarity=Neg|_",
"34": "AUX|Polarity=Neg|r-aux",
"35": "AUX|Polarity=Neg|r-fixed",
"36": "AUX|_",
"37": "AUX|r-aux",
"38": "AUX|r-cop",
"39": "AUX|r-fixed",
"40": "AUX|root",
"41": "B-ADJ",
"42": "B-ADJ.",
"43": "B-ADP",
"44": "B-ADP.",
"45": "B-ADV",
"46": "B-ADV.",
"47": "B-AUX",
"48": "B-AUX.",
"49": "B-CCONJ",
"50": "B-CCONJ.",
"51": "B-DET",
"52": "B-DET.",
"53": "B-INTJ",
"54": "B-INTJ.",
"55": "B-NOUN",
"56": "B-NOUN.",
"57": "B-NUM",
"58": "B-NUM.",
"59": "B-PART",
"60": "B-PART.",
"61": "B-PRON",
"62": "B-PRON.",
"63": "B-PROPN",
"64": "B-PROPN.",
"65": "B-PUNCT",
"66": "B-PUNCT.",
"67": "B-SCONJ",
"68": "B-SCONJ.",
"69": "B-SYM",
"70": "B-SYM.",
"71": "B-VERB",
"72": "B-VERB.",
"73": "B-X",
"74": "B-X.",
"75": "CCONJ",
"76": "CCONJ.",
"77": "CCONJ|_",
"78": "CCONJ|l-cc",
"79": "CCONJ|r-cc",
"80": "DET",
"81": "DET.",
"82": "DET|_",
"83": "DET|l-det",
"84": "I-ADJ",
"85": "I-ADJ.",
"86": "I-ADP",
"87": "I-ADP.",
"88": "I-ADV",
"89": "I-ADV.",
"90": "I-AUX",
"91": "I-AUX.",
"92": "I-CCONJ",
"93": "I-CCONJ.",
"94": "I-DET",
"95": "I-DET.",
"96": "I-INTJ",
"97": "I-INTJ.",
"98": "I-NOUN",
"99": "I-NOUN.",
"100": "I-NUM",
"101": "I-NUM.",
"102": "I-PART",
"103": "I-PART.",
"104": "I-PRON",
"105": "I-PRON.",
"106": "I-PROPN",
"107": "I-PROPN.",
"108": "I-PUNCT",
"109": "I-PUNCT.",
"110": "I-SCONJ",
"111": "I-SCONJ.",
"112": "I-SYM",
"113": "I-SYM.",
"114": "I-VERB",
"115": "I-VERB.",
"116": "I-X",
"117": "I-X.",
"118": "INTJ",
"119": "INTJ.",
"120": "INTJ|_",
"121": "INTJ|l-discourse",
"122": "INTJ|r-discourse",
"123": "INTJ|root",
"124": "NOUN",
"125": "NOUN.",
"126": "NOUN|Polarity=Neg|_",
"127": "NOUN|Polarity=Neg|l-obl",
"128": "NOUN|Polarity=Neg|root",
"129": "NOUN|_",
"130": "NOUN|l-acl",
"131": "NOUN|l-advcl",
"132": "NOUN|l-ccomp",
"133": "NOUN|l-compound",
"134": "NOUN|l-csubj",
"135": "NOUN|l-csubj:outer",
"136": "NOUN|l-nmod",
"137": "NOUN|l-nsubj",
"138": "NOUN|l-nsubj:outer",
"139": "NOUN|l-obj",
"140": "NOUN|l-obl",
"141": "NOUN|r-compound",
"142": "NOUN|r-nmod",
"143": "NOUN|r-nsubj",
"144": "NOUN|root",
"145": "NUM",
"146": "NUM.",
"147": "NUM|_",
"148": "NUM|l-advcl",
"149": "NUM|l-compound",
"150": "NUM|l-nmod",
"151": "NUM|l-nsubj",
"152": "NUM|l-nsubj:outer",
"153": "NUM|l-nummod",
"154": "NUM|l-obj",
"155": "NUM|l-obl",
"156": "NUM|r-compound",
"157": "NUM|root",
"158": "PART",
"159": "PART.",
"160": "PART|_",
"161": "PART|l-mark",
"162": "PART|r-mark",
"163": "PRON",
"164": "PRON.",
"165": "PRON|_",
"166": "PRON|l-acl",
"167": "PRON|l-advcl",
"168": "PRON|l-nmod",
"169": "PRON|l-nsubj",
"170": "PRON|l-nsubj:outer",
"171": "PRON|l-obj",
"172": "PRON|l-obl",
"173": "PRON|root",
"174": "PROPN",
"175": "PROPN.",
"176": "PROPN|_",
"177": "PROPN|l-acl",
"178": "PROPN|l-advcl",
"179": "PROPN|l-compound",
"180": "PROPN|l-nmod",
"181": "PROPN|l-nsubj",
"182": "PROPN|l-nsubj:outer",
"183": "PROPN|l-obj",
"184": "PROPN|l-obl",
"185": "PROPN|r-compound",
"186": "PROPN|r-nmod",
"187": "PROPN|root",
"188": "PUNCT",
"189": "PUNCT.",
"190": "PUNCT|_",
"191": "PUNCT|l-punct",
"192": "PUNCT|r-punct",
"193": "SCONJ",
"194": "SCONJ.",
"195": "SCONJ|_",
"196": "SCONJ|l-dep",
"197": "SCONJ|r-fixed",
"198": "SCONJ|r-mark",
"199": "SYM",
"200": "SYM.",
"201": "SYM|_",
"202": "SYM|l-compound",
"203": "SYM|l-dep",
"204": "SYM|l-nmod",
"205": "SYM|l-obl",
"206": "SYM|r-compound",
"207": "SYM|r-dep",
"208": "VERB",
"209": "VERB.",
"210": "VERB|_",
"211": "VERB|l-acl",
"212": "VERB|l-advcl",
"213": "VERB|l-ccomp",
"214": "VERB|l-compound",
"215": "VERB|l-csubj",
"216": "VERB|l-csubj:outer",
"217": "VERB|l-nmod",
"218": "VERB|l-obj",
"219": "VERB|l-obl",
"220": "VERB|r-acl",
"221": "VERB|r-advcl",
"222": "VERB|r-compound",
"223": "VERB|root",
"224": "X",
"225": "X.",
"226": "X|_",
"227": "X|l-nmod",
"228": "X|r-dep"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"ADJ": 0,
"ADJ.": 1,
"ADJ|_": 2,
"ADJ|l-acl": 3,
"ADJ|l-advcl": 4,
"ADJ|l-amod": 5,
"ADJ|l-ccomp": 6,
"ADJ|l-csubj": 7,
"ADJ|l-csubj:outer": 8,
"ADJ|l-nmod": 9,
"ADJ|l-nsubj": 10,
"ADJ|l-obj": 11,
"ADJ|l-obl": 12,
"ADJ|r-acl": 13,
"ADJ|r-amod": 14,
"ADJ|r-dep": 15,
"ADJ|root": 16,
"ADP": 17,
"ADP.": 18,
"ADP|_": 19,
"ADP|l-case": 20,
"ADP|r-case": 21,
"ADP|r-fixed": 22,
"ADV": 23,
"ADV.": 24,
"ADV|_": 25,
"ADV|l-advcl": 26,
"ADV|l-advmod": 27,
"ADV|l-obj": 28,
"ADV|r-dep": 29,
"ADV|root": 30,
"AUX": 31,
"AUX.": 32,
"AUX|Polarity=Neg|_": 33,
"AUX|Polarity=Neg|r-aux": 34,
"AUX|Polarity=Neg|r-fixed": 35,
"AUX|_": 36,
"AUX|r-aux": 37,
"AUX|r-cop": 38,
"AUX|r-fixed": 39,
"AUX|root": 40,
"B-ADJ": 41,
"B-ADJ.": 42,
"B-ADP": 43,
"B-ADP.": 44,
"B-ADV": 45,
"B-ADV.": 46,
"B-AUX": 47,
"B-AUX.": 48,
"B-CCONJ": 49,
"B-CCONJ.": 50,
"B-DET": 51,
"B-DET.": 52,
"B-INTJ": 53,
"B-INTJ.": 54,
"B-NOUN": 55,
"B-NOUN.": 56,
"B-NUM": 57,
"B-NUM.": 58,
"B-PART": 59,
"B-PART.": 60,
"B-PRON": 61,
"B-PRON.": 62,
"B-PROPN": 63,
"B-PROPN.": 64,
"B-PUNCT": 65,
"B-PUNCT.": 66,
"B-SCONJ": 67,
"B-SCONJ.": 68,
"B-SYM": 69,
"B-SYM.": 70,
"B-VERB": 71,
"B-VERB.": 72,
"B-X": 73,
"B-X.": 74,
"CCONJ": 75,
"CCONJ.": 76,
"CCONJ|_": 77,
"CCONJ|l-cc": 78,
"CCONJ|r-cc": 79,
"DET": 80,
"DET.": 81,
"DET|_": 82,
"DET|l-det": 83,
"I-ADJ": 84,
"I-ADJ.": 85,
"I-ADP": 86,
"I-ADP.": 87,
"I-ADV": 88,
"I-ADV.": 89,
"I-AUX": 90,
"I-AUX.": 91,
"I-CCONJ": 92,
"I-CCONJ.": 93,
"I-DET": 94,
"I-DET.": 95,
"I-INTJ": 96,
"I-INTJ.": 97,
"I-NOUN": 98,
"I-NOUN.": 99,
"I-NUM": 100,
"I-NUM.": 101,
"I-PART": 102,
"I-PART.": 103,
"I-PRON": 104,
"I-PRON.": 105,
"I-PROPN": 106,
"I-PROPN.": 107,
"I-PUNCT": 108,
"I-PUNCT.": 109,
"I-SCONJ": 110,
"I-SCONJ.": 111,
"I-SYM": 112,
"I-SYM.": 113,
"I-VERB": 114,
"I-VERB.": 115,
"I-X": 116,
"I-X.": 117,
"INTJ": 118,
"INTJ.": 119,
"INTJ|_": 120,
"INTJ|l-discourse": 121,
"INTJ|r-discourse": 122,
"INTJ|root": 123,
"NOUN": 124,
"NOUN.": 125,
"NOUN|Polarity=Neg|_": 126,
"NOUN|Polarity=Neg|l-obl": 127,
"NOUN|Polarity=Neg|root": 128,
"NOUN|_": 129,
"NOUN|l-acl": 130,
"NOUN|l-advcl": 131,
"NOUN|l-ccomp": 132,
"NOUN|l-compound": 133,
"NOUN|l-csubj": 134,
"NOUN|l-csubj:outer": 135,
"NOUN|l-nmod": 136,
"NOUN|l-nsubj": 137,
"NOUN|l-nsubj:outer": 138,
"NOUN|l-obj": 139,
"NOUN|l-obl": 140,
"NOUN|r-compound": 141,
"NOUN|r-nmod": 142,
"NOUN|r-nsubj": 143,
"NOUN|root": 144,
"NUM": 145,
"NUM.": 146,
"NUM|_": 147,
"NUM|l-advcl": 148,
"NUM|l-compound": 149,
"NUM|l-nmod": 150,
"NUM|l-nsubj": 151,
"NUM|l-nsubj:outer": 152,
"NUM|l-nummod": 153,
"NUM|l-obj": 154,
"NUM|l-obl": 155,
"NUM|r-compound": 156,
"NUM|root": 157,
"PART": 158,
"PART.": 159,
"PART|_": 160,
"PART|l-mark": 161,
"PART|r-mark": 162,
"PRON": 163,
"PRON.": 164,
"PRON|_": 165,
"PRON|l-acl": 166,
"PRON|l-advcl": 167,
"PRON|l-nmod": 168,
"PRON|l-nsubj": 169,
"PRON|l-nsubj:outer": 170,
"PRON|l-obj": 171,
"PRON|l-obl": 172,
"PRON|root": 173,
"PROPN": 174,
"PROPN.": 175,
"PROPN|_": 176,
"PROPN|l-acl": 177,
"PROPN|l-advcl": 178,
"PROPN|l-compound": 179,
"PROPN|l-nmod": 180,
"PROPN|l-nsubj": 181,
"PROPN|l-nsubj:outer": 182,
"PROPN|l-obj": 183,
"PROPN|l-obl": 184,
"PROPN|r-compound": 185,
"PROPN|r-nmod": 186,
"PROPN|root": 187,
"PUNCT": 188,
"PUNCT.": 189,
"PUNCT|_": 190,
"PUNCT|l-punct": 191,
"PUNCT|r-punct": 192,
"SCONJ": 193,
"SCONJ.": 194,
"SCONJ|_": 195,
"SCONJ|l-dep": 196,
"SCONJ|r-fixed": 197,
"SCONJ|r-mark": 198,
"SYM": 199,
"SYM.": 200,
"SYM|_": 201,
"SYM|l-compound": 202,
"SYM|l-dep": 203,
"SYM|l-nmod": 204,
"SYM|l-obl": 205,
"SYM|r-compound": 206,
"SYM|r-dep": 207,
"VERB": 208,
"VERB.": 209,
"VERB|_": 210,
"VERB|l-acl": 211,
"VERB|l-advcl": 212,
"VERB|l-ccomp": 213,
"VERB|l-compound": 214,
"VERB|l-csubj": 215,
"VERB|l-csubj:outer": 216,
"VERB|l-nmod": 217,
"VERB|l-obj": 218,
"VERB|l-obl": 219,
"VERB|r-acl": 220,
"VERB|r-advcl": 221,
"VERB|r-compound": 222,
"VERB|root": 223,
"X": 224,
"X.": 225,
"X|_": 226,
"X|l-nmod": 227,
"X|r-dep": 228
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"reference_compile": true,
"repad_logits_with_grad": false,
"sep_token_id": 2,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tokenizer_class": "DebertaV2TokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.47.1",
"vocab_size": 65000
}