{ "_name_or_path": "answerdotai/ModernBERT-base", "architectures": [ "ModernBertForSequenceClassification" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 50281, "classifier_activation": "gelu", "classifier_bias": false, "classifier_dropout": 0.0, "classifier_pooling": "mean", "classifiers_size": [ 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2, 6, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 3, 2, 4, 3, 3, 2, 2, 2, 2, 2, 3, 2, 3, 2, 3, 3, 3, 1, 2, 2, 3, 13, 2, 3, 2, 2, 3, 3, 2, 3, 3, 2, 3, 2, 2, 2, 2, 3, 4, 3, 3, 2, 2, 3, 3, 2, 2, 2, 2, 4, 3, 2, 2, 3 ], "cls_token_id": 50281, "decoder_bias": true, "deterministic_flash_attn": false, "embedding_dropout": 0.0, "eos_token_id": 50282, "global_attn_every_n_layers": 3, "global_rope_theta": 160000.0, "gradient_checkpointing": false, "hidden_activation": "gelu", "hidden_size": 768, "id2label": { "0": "entailment", "1": "neutral", "2": "contradiction" }, "initializer_cutoff_factor": 2.0, "initializer_range": 0.02, "intermediate_size": 1152, "label2id": { "contradiction": 2, "entailment": 0, "neutral": 1 }, "layer_norm_eps": 1e-05, "local_attention": 128, "local_rope_theta": 10000.0, "max_position_embeddings": 2048, "mlp_bias": false, "mlp_dropout": 0.0, "model_type": "modernbert", "norm_bias": false, "norm_eps": 1e-05, "num_attention_heads": 12, "num_hidden_layers": 22, "pad_token_id": 50283, "position_embedding_type": "absolute", "problem_type": "single_label_classification", "reference_compile": true, "sep_token_id": 50282, "sparse_pred_ignore_index": -100, "sparse_prediction": false, "tasks": [ "glue/mnli", "glue/qnli", "glue/rte", "glue/wnli", "super_glue/cb", "anli/a1", "anli/a2", "anli/a3", "sick/label", "sick/entailment_AB", "snli", "scitail/snli_format", "hans", "WANLI", "recast/recast_sentiment", "recast/recast_verbcorner", "recast/recast_ner", "recast/recast_factuality", "recast/recast_puns", "recast/recast_kg_relations", "recast/recast_verbnet", "recast/recast_megaveridicality", "probability_words_nli/usnli", "probability_words_nli/reasoning_1hop", "probability_words_nli/reasoning_2hop", "nan-nli", "nli_fever", "breaking_nli", "conj_nli", "fracas", "dialogue_nli", "mpe", "dnc", "recast_white/fnplus", "recast_white/sprl", "recast_white/dpr", "robust_nli/IS_CS", "robust_nli/LI_LI", "robust_nli/ST_WO", "robust_nli/PI_SP", "robust_nli/PI_CD", "robust_nli/ST_SE", "robust_nli/ST_NE", "robust_nli/ST_LM", "robust_nli_is_sd", "robust_nli_li_ts", "add_one_rte", "cycic_classification", "lingnli", "monotonicity-entailment", "scinli", "naturallogic", "syntactic-augmentation-nli", "autotnli", "defeasible-nli/atomic", "defeasible-nli/snli", "help-nli", "nli-veridicality-transitivity", "lonli", "dadc-limit-nli", "folio", "tomi-nli", "temporal-nli", "counterfactually-augmented-snli", "cnli", "chaos-mnli-ambiguity", "logiqa-2.0-nli", "mindgames", "ConTRoL-nli", "logical-fallacy", "conceptrules_v2", "zero-shot-label-nli", "scone", "monli", "SpaceNLI", "propsegment/nli", "SDOH-NLI", "scifact_entailment", "AdjectiveScaleProbe-nli", "resnli", "semantic_fragments_nli", "dataset_train_nli", "ruletaker", "PARARULE-Plus", "logical-entailment", "nope", "LogicNLI", "contract-nli/contractnli_a/seg", "contract-nli/contractnli_b/full", "nli4ct_semeval2024", "biosift-nli", "SIGA-nli", "FOL-nli", "doc-nli", "mctest-nli", "idioms-nli", "lifecycle-entailment", "MSciNLI", "hover-3way/nli", "seahorse_summarization_evaluation", "babi_nli", "gen_debiased_nli" ], "torch_dtype": "float32", "transformers_version": "4.48.0.dev0", "vocab_size": 50368 }