mayank-mishra's picture
downcast to bf16
c33ce4f
raw
history blame
1.07 kB
{
"activation_function": "swiglu",
"add_bias": true,
"apply_residual_connection_post_layernorm": false,
"architectures": [
"GraniteForCausalLM"
],
"attention_head_type": "gqa",
"attention_multiplier": null,
"attention_softmax_in_fp32": true,
"attn_pdrop": 0.1,
"auto_map": {
"AutoConfig": "configuration_granite.GraniteConfig",
"AutoModel": "modeling_granite.GraniteModel",
"AutoModelForCausalLM": "modeling_granite.GraniteForCausalLM"
},
"bos_token_id": 0,
"embd_pdrop": 0.1,
"eos_token_id": 0,
"initializer_range": 0.02,
"layer_norm_epsilon": 1e-05,
"model_type": "granite",
"n_embd": 4096,
"n_head": 32,
"n_inner": 14336,
"n_layer": 36,
"n_positions": 4096,
"normalization_function": "rmsnorm",
"num_key_value_heads": 8,
"pad_token_id": 0,
"position_embedding_type": "rope",
"resid_pdrop": 0.1,
"rope_theta": 10000,
"scale_attention_softmax_in_fp32": true,
"scale_attn_weights": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.38.1",
"use_cache": true,
"vocab_size": 49152
}