|
{ |
|
"architectures": [ |
|
"LSWTForCausalLM" |
|
], |
|
"bos_token_id": 2, |
|
"d_ffn": 4096, |
|
"d_model": 1536, |
|
"d_vocab": 768, |
|
"dropout_att_mat": 0.0, |
|
"dropout_att_out": 0.0, |
|
"dropout_ffn_int": 0.0, |
|
"dropout_ffn_out": 0.0, |
|
"dropout_layers": 0.0, |
|
"enable_bias": true, |
|
"eos_token_id": 2, |
|
"gated_att": false, |
|
"gated_ffn": true, |
|
"init_std": 0.02, |
|
"model_type": "lsw_transformer", |
|
"n_heads": 24, |
|
"n_layers": 18, |
|
"n_registers": 0, |
|
"pad_token_id": 1, |
|
"parent_embeddings": "facebook/opt-125m", |
|
"qk_norm": false, |
|
"recompute_kv": true, |
|
"rope_base_freq": 500000, |
|
"rope_dynamic": false, |
|
"rope_ntk_scale": 1.0, |
|
"rope_positions": 4096, |
|
"rope_reversed": true, |
|
"rope_yarn_a": 0.07, |
|
"rope_yarn_b": 1.0, |
|
"torch_dtype": "float16", |
|
"trainable_embeddings": true, |
|
"transformers_version": "4.37.2", |
|
"use_cache": true, |
|
"vocab_size": 50272 |
|
} |
|
|