|
{ |
|
"_name_or_path": "sberbank-ai/rugpt3small_based_on_gpt2", |
|
"activation_function": "gelu_new", |
|
"architectures": [ |
|
"GPT2ForSequenceClassification" |
|
], |
|
"attn_pdrop": 0.1, |
|
"bos_token_id": 1, |
|
"embd_pdrop": 0.1, |
|
"eos_token_id": 2, |
|
"gradient_checkpointing": false, |
|
"id2label": { |
|
"0": "LABEL_0", |
|
"1": "LABEL_1", |
|
"2": "LABEL_2", |
|
"3": "LABEL_3", |
|
"4": "LABEL_4", |
|
"5": "LABEL_5", |
|
"6": "LABEL_6", |
|
"7": "LABEL_7", |
|
"8": "LABEL_8", |
|
"9": "LABEL_9", |
|
"10": "LABEL_10", |
|
"11": "LABEL_11", |
|
"12": "LABEL_12", |
|
"13": "LABEL_13", |
|
"14": "LABEL_14", |
|
"15": "LABEL_15", |
|
"16": "LABEL_16", |
|
"17": "LABEL_17", |
|
"18": "LABEL_18", |
|
"19": "LABEL_19", |
|
"20": "LABEL_20", |
|
"21": "LABEL_21", |
|
"22": "LABEL_22", |
|
"23": "LABEL_23", |
|
"24": "LABEL_24", |
|
"25": "LABEL_25", |
|
"26": "LABEL_26", |
|
"27": "LABEL_27", |
|
"28": "LABEL_28", |
|
"29": "LABEL_29", |
|
"30": "LABEL_30", |
|
"31": "LABEL_31", |
|
"32": "LABEL_32", |
|
"33": "LABEL_33", |
|
"34": "LABEL_34", |
|
"35": "LABEL_35", |
|
"36": "LABEL_36", |
|
"37": "LABEL_37", |
|
"38": "LABEL_38", |
|
"39": "LABEL_39", |
|
"40": "LABEL_40", |
|
"41": "LABEL_41", |
|
"42": "LABEL_42", |
|
"43": "LABEL_43", |
|
"44": "LABEL_44", |
|
"45": "LABEL_45", |
|
"46": "LABEL_46", |
|
"47": "LABEL_47", |
|
"48": "LABEL_48", |
|
"49": "LABEL_49" |
|
}, |
|
"initializer_range": 0.02, |
|
"label2id": { |
|
"LABEL_0": 0, |
|
"LABEL_1": 1, |
|
"LABEL_10": 10, |
|
"LABEL_11": 11, |
|
"LABEL_12": 12, |
|
"LABEL_13": 13, |
|
"LABEL_14": 14, |
|
"LABEL_15": 15, |
|
"LABEL_16": 16, |
|
"LABEL_17": 17, |
|
"LABEL_18": 18, |
|
"LABEL_19": 19, |
|
"LABEL_2": 2, |
|
"LABEL_20": 20, |
|
"LABEL_21": 21, |
|
"LABEL_22": 22, |
|
"LABEL_23": 23, |
|
"LABEL_24": 24, |
|
"LABEL_25": 25, |
|
"LABEL_26": 26, |
|
"LABEL_27": 27, |
|
"LABEL_28": 28, |
|
"LABEL_29": 29, |
|
"LABEL_3": 3, |
|
"LABEL_30": 30, |
|
"LABEL_31": 31, |
|
"LABEL_32": 32, |
|
"LABEL_33": 33, |
|
"LABEL_34": 34, |
|
"LABEL_35": 35, |
|
"LABEL_36": 36, |
|
"LABEL_37": 37, |
|
"LABEL_38": 38, |
|
"LABEL_39": 39, |
|
"LABEL_4": 4, |
|
"LABEL_40": 40, |
|
"LABEL_41": 41, |
|
"LABEL_42": 42, |
|
"LABEL_43": 43, |
|
"LABEL_44": 44, |
|
"LABEL_45": 45, |
|
"LABEL_46": 46, |
|
"LABEL_47": 47, |
|
"LABEL_48": 48, |
|
"LABEL_49": 49, |
|
"LABEL_5": 5, |
|
"LABEL_6": 6, |
|
"LABEL_7": 7, |
|
"LABEL_8": 8, |
|
"LABEL_9": 9 |
|
}, |
|
"layer_norm_epsilon": 1e-05, |
|
"model_type": "gpt2", |
|
"n_ctx": 2048, |
|
"n_embd": 768, |
|
"n_head": 12, |
|
"n_inner": null, |
|
"n_layer": 12, |
|
"n_positions": 2048, |
|
"pad_token_id": 0, |
|
"problem_type": "single_label_classification", |
|
"reorder_and_upcast_attn": false, |
|
"resid_pdrop": 0.1, |
|
"scale_attn_by_inverse_layer_idx": false, |
|
"scale_attn_weights": true, |
|
"summary_activation": null, |
|
"summary_first_dropout": 0.1, |
|
"summary_proj_to_labels": true, |
|
"summary_type": "cls_index", |
|
"summary_use_proj": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.46.3", |
|
"use_cache": true, |
|
"vocab_size": 50264 |
|
} |
|
|