{ | |
"_attn_implementation_autoset": true, | |
"_name_or_path": "migtissera/Tess-v2.5-Phi-3-medium-128k-14B", | |
"architectures": [ | |
"Phi3ForCausalLM" | |
], | |
"attention_bias": false, | |
"attention_dropout": 0.0, | |
"auto_map": { | |
"AutoConfig": "microsoft/Phi-3-medium-128k-instruct--configuration_phi3.Phi3Config", | |
"AutoModelForCausalLM": "microsoft/Phi-3-medium-128k-instruct--modeling_phi3.Phi3ForCausalLM" | |
}, | |
"bos_token_id": 1, | |
"embd_pdrop": 0.0, | |
"eos_token_id": 32011, | |
"hidden_act": "silu", | |
"hidden_size": 5120, | |
"initializer_range": 0.02, | |
"intermediate_size": 17920, | |
"max_position_embeddings": 131072, | |
"model_type": "phi3", | |
"num_attention_heads": 40, | |
"num_hidden_layers": 40, | |
"num_key_value_heads": 10, | |
"original_max_position_embeddings": 4096, | |
"pad_token_id": null, | |
"resid_pdrop": 0.0, | |
"rms_norm_eps": 1e-05, | |
"rope_scaling": { | |
"long_factor": [ | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.25, | |
1.25, | |
1.5, | |
2.0, | |
2.75, | |
5.75, | |
5.75, | |
6.5, | |
9.25, | |
11.0, | |
13.25, | |
19.25, | |
19.75, | |
19.75, | |
21.25, | |
21.5, | |
26.5, | |
30.0, | |
33.75, | |
35.25, | |
38.5, | |
42.0, | |
42.25, | |
46.0, | |
47.0, | |
50.0, | |
50.5, | |
51.0, | |
52.0, | |
52.75, | |
53.75, | |
54.75, | |
57.0, | |
57.25, | |
58.5, | |
59.25, | |
59.5, | |
62.0, | |
62.5, | |
62.75, | |
63.25, | |
63.25, | |
63.25, | |
63.75, | |
64.0, | |
64.0, | |
64.25, | |
64.5, | |
64.5, | |
65.0, | |
65.0 | |
], | |
"short_factor": [ | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.0, | |
1.01, | |
1.02, | |
1.02, | |
1.04, | |
1.04, | |
1.07, | |
1.07, | |
1.1, | |
1.3000000000000003, | |
1.3000000000000003, | |
1.5000000000000004, | |
1.5700000000000005, | |
1.9000000000000008, | |
2.3100000000000014, | |
2.759999999999992, | |
3.3899999999999784, | |
3.9399999999999666, | |
4.009999999999965, | |
4.289999999999959, | |
4.349999999999958, | |
5.349999999999937, | |
6.659999999999909, | |
7.029999999999901, | |
7.51999999999989, | |
8.00999999999988, | |
8.249999999999876, | |
8.279999999999875, | |
9.629999999999846, | |
9.89999999999984, | |
10.589999999999826, | |
11.049999999999816, | |
11.7899999999998, | |
12.189999999999792, | |
12.889999999999777, | |
13.129999999999772, | |
13.16999999999977, | |
13.20999999999977, | |
13.479999999999764, | |
13.539999999999763, | |
13.779999999999758, | |
13.929999999999755, | |
14.429999999999744, | |
14.759999999999737, | |
15.149999999999729, | |
15.419999999999723, | |
15.53999999999972, | |
15.659999999999718, | |
15.749999999999716, | |
15.759999999999716, | |
15.799999999999715, | |
16.05999999999971, | |
16.079999999999714, | |
16.11999999999972, | |
16.11999999999972, | |
16.18999999999973, | |
16.31999999999975, | |
16.539999999999786, | |
16.799999999999827 | |
], | |
"type": "su" | |
}, | |
"rope_theta": 10000.0, | |
"sliding_window": 131072, | |
"tie_word_embeddings": false, | |
"torch_dtype": "bfloat16", | |
"transformers_version": "4.46.0", | |
"use_cache": false, | |
"vocab_size": 32064 | |
} | |