Text Generation
Transformers
Safetensors
openelm
custom_code
OpenELM-270M-Instruct / config.json
qicao-apple's picture
add OpenELM-270M-Instruct
c401df2
{
"activation_fn_name": "swish",
"architectures": [
"OpenELMForCausalLM"
],
"auto_map": {
"AutoConfig": "configuration_openelm.OpenELMConfig",
"AutoModelForCausalLM": "modeling_openelm.OpenELMForCausalLM"
},
"bos_token_id": 1,
"eos_token_id": 2,
"ffn_dim_divisor": 256,
"ffn_multipliers": [
0.5,
0.73,
0.97,
1.2,
1.43,
1.67,
1.9,
2.13,
2.37,
2.6,
2.83,
3.07,
3.3,
3.53,
3.77,
4.0
],
"ffn_with_glu": true,
"head_dim": 64,
"initializer_range": 0.02,
"max_context_length": 2048,
"model_dim": 1280,
"model_type": "openelm",
"normalization_layer_name": "rms_norm",
"normalize_qk_projections": true,
"num_gqa_groups": 4,
"num_kv_heads": [
3,
3,
3,
3,
3,
4,
4,
4,
4,
4,
4,
4,
5,
5,
5,
5
],
"num_query_heads": [
12,
12,
12,
12,
12,
16,
16,
16,
16,
16,
16,
16,
20,
20,
20,
20
],
"num_transformer_layers": 16,
"qkv_multipliers": [
0.5,
1.0
],
"rope_freq_constant": 10000,
"rope_max_length": 4096,
"share_input_output_layers": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.39.3",
"use_cache": true,
"vocab_size": 32000
}