|
{ |
|
"_name_or_path": "distilbert-base-cased", |
|
"activation": "gelu", |
|
"architectures": [ |
|
"DistilBertForSequenceClassification" |
|
], |
|
"attention_dropout": 0.1, |
|
"dim": 768, |
|
"dropout": 0.1, |
|
"hidden_dim": 3072, |
|
"id2label": { |
|
"0": "Bloom-7B", |
|
"1": "Claude-Instant-v1", |
|
"2": "Claude-v1", |
|
"3": "Cohere-Command", |
|
"4": "Dolphin-2.5-Mixtral-8x7B", |
|
"5": "Dolphin-Mixtral-8x7B", |
|
"6": "Falcon-180B", |
|
"7": "Flan-T5-Base", |
|
"8": "Flan-T5-Large", |
|
"9": "Flan-T5-Small", |
|
"10": "Flan-T5-XL", |
|
"11": "Flan-T5-XXL", |
|
"12": "GLM-130B", |
|
"13": "GPT-3.5", |
|
"14": "GPT-4", |
|
"15": "GPT-J", |
|
"16": "GPT-NeoX", |
|
"17": "Gemini-Pro", |
|
"18": "Goliath-120B", |
|
"19": "Human", |
|
"20": "LLaMA-13B", |
|
"21": "LLaMA-2-70B", |
|
"22": "LLaMA-2-7B", |
|
"23": "LLaMA-30B", |
|
"24": "LLaMA-65B", |
|
"25": "LLaMA-7B", |
|
"26": "LZLV-70B", |
|
"27": "Mistral-7B", |
|
"28": "Mistral-7B-OpenOrca", |
|
"29": "Mixtral-8x7B", |
|
"30": "MythoMax-L2-13B", |
|
"31": "Neural-Chat-7B", |
|
"32": "Noromaid-20B", |
|
"33": "Nous-Capybara-34B", |
|
"34": "Nous-Capybara-7B", |
|
"35": "Nous-Hermes-LLaMA-2-13B", |
|
"36": "Nous-Hermes-LLaMA-2-70B", |
|
"37": "OPT-1.3B", |
|
"38": "OPT-125M", |
|
"39": "OPT-13B", |
|
"40": "OPT-2.7B", |
|
"41": "OPT-30B", |
|
"42": "OPT-350M", |
|
"43": "OPT-6.7B", |
|
"44": "OpenChat-3.5", |
|
"45": "OpenHermes-2-Mistral-7B", |
|
"46": "OpenHermes-2.5-Mistral-7B", |
|
"47": "PaLM-2", |
|
"48": "Psyfighter-13B", |
|
"49": "Psyfighter-2-13B", |
|
"50": "RWKV-5-World-3B", |
|
"51": "StripedHyena-Nous-7B", |
|
"52": "T0-11B", |
|
"53": "T0-3B", |
|
"54": "Text-Ada-001", |
|
"55": "Text-Babbage-001", |
|
"56": "Text-Curie-001", |
|
"57": "Text-Davinci-001", |
|
"58": "Text-Davinci-002", |
|
"59": "Text-Davinci-003", |
|
"60": "Toppy-M-7B", |
|
"61": "Unknown", |
|
"62": "YI-34B" |
|
}, |
|
"initializer_range": 0.02, |
|
"label2id": { |
|
"Bloom-7B": 0, |
|
"Claude-Instant-v1": 1, |
|
"Claude-v1": 2, |
|
"Cohere-Command": 3, |
|
"Dolphin-2.5-Mixtral-8x7B": 4, |
|
"Dolphin-Mixtral-8x7B": 5, |
|
"Falcon-180B": 6, |
|
"Flan-T5-Base": 7, |
|
"Flan-T5-Large": 8, |
|
"Flan-T5-Small": 9, |
|
"Flan-T5-XL": 10, |
|
"Flan-T5-XXL": 11, |
|
"GLM-130B": 12, |
|
"GPT-3.5": 13, |
|
"GPT-4": 14, |
|
"GPT-J": 15, |
|
"GPT-NeoX": 16, |
|
"Gemini-Pro": 17, |
|
"Goliath-120B": 18, |
|
"Human": 19, |
|
"LLaMA-13B": 20, |
|
"LLaMA-2-70B": 21, |
|
"LLaMA-2-7B": 22, |
|
"LLaMA-30B": 23, |
|
"LLaMA-65B": 24, |
|
"LLaMA-7B": 25, |
|
"LZLV-70B": 26, |
|
"Mistral-7B": 27, |
|
"Mistral-7B-OpenOrca": 28, |
|
"Mixtral-8x7B": 29, |
|
"MythoMax-L2-13B": 30, |
|
"Neural-Chat-7B": 31, |
|
"Noromaid-20B": 32, |
|
"Nous-Capybara-34B": 33, |
|
"Nous-Capybara-7B": 34, |
|
"Nous-Hermes-LLaMA-2-13B": 35, |
|
"Nous-Hermes-LLaMA-2-70B": 36, |
|
"OPT-1.3B": 37, |
|
"OPT-125M": 38, |
|
"OPT-13B": 39, |
|
"OPT-2.7B": 40, |
|
"OPT-30B": 41, |
|
"OPT-350M": 42, |
|
"OPT-6.7B": 43, |
|
"OpenChat-3.5": 44, |
|
"OpenHermes-2-Mistral-7B": 45, |
|
"OpenHermes-2.5-Mistral-7B": 46, |
|
"PaLM-2": 47, |
|
"Psyfighter-13B": 48, |
|
"Psyfighter-2-13B": 49, |
|
"RWKV-5-World-3B": 50, |
|
"StripedHyena-Nous-7B": 51, |
|
"T0-11B": 52, |
|
"T0-3B": 53, |
|
"Text-Ada-001": 54, |
|
"Text-Babbage-001": 55, |
|
"Text-Curie-001": 56, |
|
"Text-Davinci-001": 57, |
|
"Text-Davinci-002": 58, |
|
"Text-Davinci-003": 59, |
|
"Toppy-M-7B": 60, |
|
"Unknown": 61, |
|
"YI-34B": 62 |
|
}, |
|
"max_position_embeddings": 512, |
|
"model_type": "distilbert", |
|
"n_heads": 12, |
|
"n_layers": 6, |
|
"output_past": true, |
|
"pad_token_id": 0, |
|
"qa_dropout": 0.1, |
|
"seq_classif_dropout": 0.2, |
|
"sinusoidal_pos_embds": false, |
|
"tie_weights_": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.36.2", |
|
"vocab_size": 28996 |
|
} |
|
|