{ | |
"activation": "silu", | |
"attn_type": "mha", | |
"bias": false, | |
"d_model": 1024, | |
"dropout": 0.2, | |
"head_dim": null, | |
"hidden_dim": 1536, | |
"kv_lora_rank": null, | |
"mlp": "GLU", | |
"num_heads": 16, | |
"num_kv_heads": 16, | |
"num_layers": 4, | |
"q_lora_rank": null, | |
"rope_head_dim": null, | |
"seq_len": 256, | |
"vocab_size": 50257, | |
"weight_tying": false | |
} |