{ "model_type": "mistral", "architectures": ["MistralForCausalLM"], "hidden_size": 4096, "intermediate_size": 11008, // Size of the feedforward layer "num_attention_heads": 32, "num_hidden_layers": 32, "initializer_range": 0.02, "max_position_embeddings": 2048, "use_cache": true, "pad_token_id": 0, "bos_token_id": 1, "eos_token_id": 2, "tie_word_embeddings": false, "attention_probs_dropout_prob": 0.1, // Dropout for attention layers "hidden_dropout_prob": 0.1, // Dropout for fully connected layers "layer_norm_eps": 1e-5, // Epsilon for layer normalization "transformers_version": "4.33.0", // Specify the Transformers version "quantization_config": { "load_in_4bit": true, "bnb_4bit_compute_dtype": "bfloat16", "bnb_4bit_quant_type": "nf4", "bnb_4bit_use_double_quant": true } }