{ "architectures": [ "ModernBertForTokenClassification" ], "attention_bias": false, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration_modernbert.ModernBertConfig", "AutoModel": "modeling_modernbert.ModernBertModel", "AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM", "AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification", "AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification" }, "bos_token_id": 0, "classifier_activation": "gelu", "classifier_bias": false, "classifier_dropout": 0.0, "classifier_pooling": "mean", "cls_token_id": 0, "custom_pipelines": { "upos": { "impl": "ud.BellmanFordTokenClassificationPipeline", "pt": "AutoModelForTokenClassification" }, "universal-dependencies": { "impl": "ud.UniversalDependenciesPipeline", "pt": "AutoModelForTokenClassification" } }, "decoder_bias": true, "deterministic_flash_attn": false, "embedding_dropout": 0.0, "eos_token_id": 2, "global_attn_every_n_layers": 3, "global_rope_theta": 160000.0, "gradient_checkpointing": false, "hidden_activation": "gelu", "hidden_size": 768, "id2label": { "0": "ADJ", "1": "ADJ.", "2": "ADJ|_", "3": "ADJ|l-acl", "4": "ADJ|l-advcl", "5": "ADJ|l-amod", "6": "ADJ|l-ccomp", "7": "ADJ|l-csubj", "8": "ADJ|l-csubj:outer", "9": "ADJ|l-nmod", "10": "ADJ|l-nsubj", "11": "ADJ|l-obj", "12": "ADJ|l-obl", "13": "ADJ|r-acl", "14": "ADJ|r-amod", "15": "ADJ|r-dep", "16": "ADJ|root", "17": "ADP", "18": "ADP.", "19": "ADP|_", "20": "ADP|l-case", "21": "ADP|r-case", "22": "ADP|r-fixed", "23": "ADV", "24": "ADV.", "25": "ADV|_", "26": "ADV|l-advcl", "27": "ADV|l-advmod", "28": "ADV|l-obj", "29": "ADV|r-dep", "30": "ADV|root", "31": "AUX", "32": "AUX.", "33": "AUX|Polarity=Neg|_", "34": "AUX|Polarity=Neg|r-aux", "35": "AUX|Polarity=Neg|r-fixed", "36": "AUX|_", "37": "AUX|r-aux", "38": "AUX|r-cop", "39": "AUX|r-fixed", "40": "AUX|root", "41": "B-ADJ", "42": "B-ADJ.", "43": "B-ADP", "44": "B-ADP.", "45": "B-ADV", "46": "B-ADV.", "47": "B-AUX", "48": "B-AUX.", "49": "B-CCONJ", "50": "B-CCONJ.", "51": "B-DET", "52": "B-DET.", "53": "B-INTJ", "54": "B-INTJ.", "55": "B-NOUN", "56": "B-NOUN.", "57": "B-NUM", "58": "B-NUM.", "59": "B-PART", "60": "B-PART.", "61": "B-PRON", "62": "B-PRON.", "63": "B-PROPN", "64": "B-PROPN.", "65": "B-PUNCT", "66": "B-PUNCT.", "67": "B-SCONJ", "68": "B-SCONJ.", "69": "B-SYM", "70": "B-SYM.", "71": "B-VERB", "72": "B-VERB.", "73": "B-X", "74": "B-X.", "75": "CCONJ", "76": "CCONJ.", "77": "CCONJ|_", "78": "CCONJ|l-cc", "79": "CCONJ|r-cc", "80": "DET", "81": "DET.", "82": "DET|_", "83": "DET|l-det", "84": "I-ADJ", "85": "I-ADJ.", "86": "I-ADP", "87": "I-ADP.", "88": "I-ADV", "89": "I-ADV.", "90": "I-AUX", "91": "I-AUX.", "92": "I-CCONJ", "93": "I-CCONJ.", "94": "I-DET", "95": "I-DET.", "96": "I-INTJ", "97": "I-INTJ.", "98": "I-NOUN", "99": "I-NOUN.", "100": "I-NUM", "101": "I-NUM.", "102": "I-PART", "103": "I-PART.", "104": "I-PRON", "105": "I-PRON.", "106": "I-PROPN", "107": "I-PROPN.", "108": "I-PUNCT", "109": "I-PUNCT.", "110": "I-SCONJ", "111": "I-SCONJ.", "112": "I-SYM", "113": "I-SYM.", "114": "I-VERB", "115": "I-VERB.", "116": "I-X", "117": "I-X.", "118": "INTJ", "119": "INTJ.", "120": "INTJ|_", "121": "INTJ|l-discourse", "122": "INTJ|r-discourse", "123": "INTJ|root", "124": "NOUN", "125": "NOUN.", "126": "NOUN|Polarity=Neg|_", "127": "NOUN|Polarity=Neg|l-obl", "128": "NOUN|Polarity=Neg|root", "129": "NOUN|_", "130": "NOUN|l-acl", "131": "NOUN|l-advcl", "132": "NOUN|l-ccomp", "133": "NOUN|l-compound", "134": "NOUN|l-csubj", "135": "NOUN|l-csubj:outer", "136": "NOUN|l-nmod", "137": "NOUN|l-nsubj", "138": "NOUN|l-nsubj:outer", "139": "NOUN|l-obj", "140": "NOUN|l-obl", "141": "NOUN|r-compound", "142": "NOUN|r-nmod", "143": "NOUN|r-nsubj", "144": "NOUN|root", "145": "NUM", "146": "NUM.", "147": "NUM|_", "148": "NUM|l-advcl", "149": "NUM|l-compound", "150": "NUM|l-nmod", "151": "NUM|l-nsubj", "152": "NUM|l-nsubj:outer", "153": "NUM|l-nummod", "154": "NUM|l-obj", "155": "NUM|l-obl", "156": "NUM|r-compound", "157": "NUM|root", "158": "PART", "159": "PART.", "160": "PART|_", "161": "PART|l-mark", "162": "PART|r-mark", "163": "PRON", "164": "PRON.", "165": "PRON|_", "166": "PRON|l-acl", "167": "PRON|l-advcl", "168": "PRON|l-nmod", "169": "PRON|l-nsubj", "170": "PRON|l-nsubj:outer", "171": "PRON|l-obj", "172": "PRON|l-obl", "173": "PRON|root", "174": "PROPN", "175": "PROPN.", "176": "PROPN|_", "177": "PROPN|l-acl", "178": "PROPN|l-advcl", "179": "PROPN|l-compound", "180": "PROPN|l-nmod", "181": "PROPN|l-nsubj", "182": "PROPN|l-nsubj:outer", "183": "PROPN|l-obj", "184": "PROPN|l-obl", "185": "PROPN|r-compound", "186": "PROPN|r-nmod", "187": "PROPN|root", "188": "PUNCT", "189": "PUNCT.", "190": "PUNCT|_", "191": "PUNCT|l-punct", "192": "PUNCT|r-punct", "193": "SCONJ", "194": "SCONJ.", "195": "SCONJ|_", "196": "SCONJ|l-dep", "197": "SCONJ|r-fixed", "198": "SCONJ|r-mark", "199": "SYM", "200": "SYM.", "201": "SYM|_", "202": "SYM|l-compound", "203": "SYM|l-dep", "204": "SYM|l-nmod", "205": "SYM|l-obl", "206": "SYM|r-compound", "207": "SYM|r-dep", "208": "VERB", "209": "VERB.", "210": "VERB|_", "211": "VERB|l-acl", "212": "VERB|l-advcl", "213": "VERB|l-ccomp", "214": "VERB|l-compound", "215": "VERB|l-csubj", "216": "VERB|l-csubj:outer", "217": "VERB|l-nmod", "218": "VERB|l-obj", "219": "VERB|l-obl", "220": "VERB|r-acl", "221": "VERB|r-advcl", "222": "VERB|r-compound", "223": "VERB|root", "224": "X", "225": "X.", "226": "X|_", "227": "X|l-nmod", "228": "X|r-dep" }, "initializer_cutoff_factor": 2.0, "initializer_range": 0.02, "intermediate_size": 1152, "label2id": { "ADJ": 0, "ADJ.": 1, "ADJ|_": 2, "ADJ|l-acl": 3, "ADJ|l-advcl": 4, "ADJ|l-amod": 5, "ADJ|l-ccomp": 6, "ADJ|l-csubj": 7, "ADJ|l-csubj:outer": 8, "ADJ|l-nmod": 9, "ADJ|l-nsubj": 10, "ADJ|l-obj": 11, "ADJ|l-obl": 12, "ADJ|r-acl": 13, "ADJ|r-amod": 14, "ADJ|r-dep": 15, "ADJ|root": 16, "ADP": 17, "ADP.": 18, "ADP|_": 19, "ADP|l-case": 20, "ADP|r-case": 21, "ADP|r-fixed": 22, "ADV": 23, "ADV.": 24, "ADV|_": 25, "ADV|l-advcl": 26, "ADV|l-advmod": 27, "ADV|l-obj": 28, "ADV|r-dep": 29, "ADV|root": 30, "AUX": 31, "AUX.": 32, "AUX|Polarity=Neg|_": 33, "AUX|Polarity=Neg|r-aux": 34, "AUX|Polarity=Neg|r-fixed": 35, "AUX|_": 36, "AUX|r-aux": 37, "AUX|r-cop": 38, "AUX|r-fixed": 39, "AUX|root": 40, "B-ADJ": 41, "B-ADJ.": 42, "B-ADP": 43, "B-ADP.": 44, "B-ADV": 45, "B-ADV.": 46, "B-AUX": 47, "B-AUX.": 48, "B-CCONJ": 49, "B-CCONJ.": 50, "B-DET": 51, "B-DET.": 52, "B-INTJ": 53, "B-INTJ.": 54, "B-NOUN": 55, "B-NOUN.": 56, "B-NUM": 57, "B-NUM.": 58, "B-PART": 59, "B-PART.": 60, "B-PRON": 61, "B-PRON.": 62, "B-PROPN": 63, "B-PROPN.": 64, "B-PUNCT": 65, "B-PUNCT.": 66, "B-SCONJ": 67, "B-SCONJ.": 68, "B-SYM": 69, "B-SYM.": 70, "B-VERB": 71, "B-VERB.": 72, "B-X": 73, "B-X.": 74, "CCONJ": 75, "CCONJ.": 76, "CCONJ|_": 77, "CCONJ|l-cc": 78, "CCONJ|r-cc": 79, "DET": 80, "DET.": 81, "DET|_": 82, "DET|l-det": 83, "I-ADJ": 84, "I-ADJ.": 85, "I-ADP": 86, "I-ADP.": 87, "I-ADV": 88, "I-ADV.": 89, "I-AUX": 90, "I-AUX.": 91, "I-CCONJ": 92, "I-CCONJ.": 93, "I-DET": 94, "I-DET.": 95, "I-INTJ": 96, "I-INTJ.": 97, "I-NOUN": 98, "I-NOUN.": 99, "I-NUM": 100, "I-NUM.": 101, "I-PART": 102, "I-PART.": 103, "I-PRON": 104, "I-PRON.": 105, "I-PROPN": 106, "I-PROPN.": 107, "I-PUNCT": 108, "I-PUNCT.": 109, "I-SCONJ": 110, "I-SCONJ.": 111, "I-SYM": 112, "I-SYM.": 113, "I-VERB": 114, "I-VERB.": 115, "I-X": 116, "I-X.": 117, "INTJ": 118, "INTJ.": 119, "INTJ|_": 120, "INTJ|l-discourse": 121, "INTJ|r-discourse": 122, "INTJ|root": 123, "NOUN": 124, "NOUN.": 125, "NOUN|Polarity=Neg|_": 126, "NOUN|Polarity=Neg|l-obl": 127, "NOUN|Polarity=Neg|root": 128, "NOUN|_": 129, "NOUN|l-acl": 130, "NOUN|l-advcl": 131, "NOUN|l-ccomp": 132, "NOUN|l-compound": 133, "NOUN|l-csubj": 134, "NOUN|l-csubj:outer": 135, "NOUN|l-nmod": 136, "NOUN|l-nsubj": 137, "NOUN|l-nsubj:outer": 138, "NOUN|l-obj": 139, "NOUN|l-obl": 140, "NOUN|r-compound": 141, "NOUN|r-nmod": 142, "NOUN|r-nsubj": 143, "NOUN|root": 144, "NUM": 145, "NUM.": 146, "NUM|_": 147, "NUM|l-advcl": 148, "NUM|l-compound": 149, "NUM|l-nmod": 150, "NUM|l-nsubj": 151, "NUM|l-nsubj:outer": 152, "NUM|l-nummod": 153, "NUM|l-obj": 154, "NUM|l-obl": 155, "NUM|r-compound": 156, "NUM|root": 157, "PART": 158, "PART.": 159, "PART|_": 160, "PART|l-mark": 161, "PART|r-mark": 162, "PRON": 163, "PRON.": 164, "PRON|_": 165, "PRON|l-acl": 166, "PRON|l-advcl": 167, "PRON|l-nmod": 168, "PRON|l-nsubj": 169, "PRON|l-nsubj:outer": 170, "PRON|l-obj": 171, "PRON|l-obl": 172, "PRON|root": 173, "PROPN": 174, "PROPN.": 175, "PROPN|_": 176, "PROPN|l-acl": 177, "PROPN|l-advcl": 178, "PROPN|l-compound": 179, "PROPN|l-nmod": 180, "PROPN|l-nsubj": 181, "PROPN|l-nsubj:outer": 182, "PROPN|l-obj": 183, "PROPN|l-obl": 184, "PROPN|r-compound": 185, "PROPN|r-nmod": 186, "PROPN|root": 187, "PUNCT": 188, "PUNCT.": 189, "PUNCT|_": 190, "PUNCT|l-punct": 191, "PUNCT|r-punct": 192, "SCONJ": 193, "SCONJ.": 194, "SCONJ|_": 195, "SCONJ|l-dep": 196, "SCONJ|r-fixed": 197, "SCONJ|r-mark": 198, "SYM": 199, "SYM.": 200, "SYM|_": 201, "SYM|l-compound": 202, "SYM|l-dep": 203, "SYM|l-nmod": 204, "SYM|l-obl": 205, "SYM|r-compound": 206, "SYM|r-dep": 207, "VERB": 208, "VERB.": 209, "VERB|_": 210, "VERB|l-acl": 211, "VERB|l-advcl": 212, "VERB|l-ccomp": 213, "VERB|l-compound": 214, "VERB|l-csubj": 215, "VERB|l-csubj:outer": 216, "VERB|l-nmod": 217, "VERB|l-obj": 218, "VERB|l-obl": 219, "VERB|r-acl": 220, "VERB|r-advcl": 221, "VERB|r-compound": 222, "VERB|root": 223, "X": 224, "X.": 225, "X|_": 226, "X|l-nmod": 227, "X|r-dep": 228 }, "layer_norm_eps": 1e-05, "local_attention": 128, "local_rope_theta": 10000.0, "max_position_embeddings": 8192, "mlp_bias": false, "mlp_dropout": 0.0, "model_type": "modernbert", "norm_bias": false, "norm_eps": 1e-05, "num_attention_heads": 12, "num_hidden_layers": 22, "pad_token_id": 1, "position_embedding_type": "absolute", "reference_compile": true, "repad_logits_with_grad": false, "sep_token_id": 2, "sparse_pred_ignore_index": -100, "sparse_prediction": false, "tokenizer_class": "DebertaV2TokenizerFast", "torch_dtype": "float32", "transformers_version": "4.47.1", "vocab_size": 65000 }