sharpenb commited on
Commit
d33ca97
1 Parent(s): 0ef9a4e

Update README.md

Browse files
Files changed (1) hide show
  1. config.json +0 -143
config.json CHANGED
@@ -1,143 +0,0 @@
1
- {
2
- "_name_or_path": "microsoft/Phi-3-small-8k-instruct",
3
- "architectures": [
4
- "Phi3SmallForCausalLM"
5
- ],
6
- "attention_bias": false,
7
- "attention_dropout_prob": 0.0,
8
- "auto_map": {
9
- "AutoConfig": "microsoft/Phi-3-small-8k-instruct--configuration_phi3_small.Phi3SmallConfig",
10
- "AutoModelForCausalLM": "microsoft/Phi-3-small-8k-instruct--modeling_phi3_small.Phi3SmallForCausalLM",
11
- "AutoModelForSequenceClassification": "microsoft/Phi-3-small-8k-instruct--modeling_phi3_small.Phi3SmallForSequenceClassification",
12
- "AutoTokenizer": "microsoft/Phi-3-small-8k-instruct--tokenization_phi3_small.Phi3SmallTokenizer"
13
- },
14
- "blocksparse_block_size": 64,
15
- "blocksparse_homo_head_pattern": false,
16
- "blocksparse_num_local_blocks": 16,
17
- "blocksparse_triton_kernel_block_size": 64,
18
- "blocksparse_vert_stride": 8,
19
- "bos_token_id": 100257,
20
- "dense_attention_every_n_layers": 2,
21
- "dummy_token_indices": [
22
- 100256,
23
- 100258,
24
- 100259,
25
- 100260,
26
- 100264,
27
- 100265,
28
- 100267,
29
- 100268,
30
- 100269,
31
- 100270,
32
- 100271,
33
- 100272,
34
- 100273,
35
- 100274,
36
- 100275,
37
- 100276,
38
- 100277,
39
- 100278,
40
- 100279,
41
- 100280,
42
- 100281,
43
- 100282,
44
- 100283,
45
- 100284,
46
- 100285,
47
- 100286,
48
- 100287,
49
- 100288,
50
- 100289,
51
- 100290,
52
- 100291,
53
- 100292,
54
- 100293,
55
- 100294,
56
- 100295,
57
- 100296,
58
- 100297,
59
- 100298,
60
- 100299,
61
- 100300,
62
- 100301,
63
- 100302,
64
- 100303,
65
- 100304,
66
- 100305,
67
- 100306,
68
- 100307,
69
- 100308,
70
- 100309,
71
- 100310,
72
- 100311,
73
- 100312,
74
- 100313,
75
- 100314,
76
- 100315,
77
- 100316,
78
- 100317,
79
- 100318,
80
- 100319,
81
- 100320,
82
- 100321,
83
- 100322,
84
- 100323,
85
- 100324,
86
- 100325,
87
- 100326,
88
- 100327,
89
- 100328,
90
- 100329,
91
- 100330,
92
- 100331,
93
- 100332,
94
- 100333,
95
- 100334,
96
- 100335,
97
- 100336,
98
- 100337,
99
- 100338,
100
- 100339,
101
- 100340,
102
- 100341,
103
- 100342,
104
- 100343,
105
- 100344,
106
- 100345,
107
- 100346,
108
- 100347,
109
- 100348,
110
- 100349,
111
- 100350,
112
- 100351
113
- ],
114
- "embedding_dropout_prob": 0.1,
115
- "eos_token_id": 100257,
116
- "ff_dim_multiplier": null,
117
- "ff_intermediate_size": 14336,
118
- "ffn_dropout_prob": 0.1,
119
- "gegelu_limit": 20.0,
120
- "gegelu_pad_to_256": true,
121
- "hidden_act": "gegelu",
122
- "hidden_size": 4096,
123
- "initializer_range": 0.02,
124
- "layer_norm_epsilon": 1e-05,
125
- "max_position_embeddings": 8192,
126
- "model_type": "phi3small",
127
- "mup_attn_multiplier": 1.0,
128
- "mup_embedding_multiplier": 10.0,
129
- "mup_use_scaling": true,
130
- "mup_width_multiplier": 8.0,
131
- "num_attention_heads": 32,
132
- "num_hidden_layers": 32,
133
- "num_key_value_heads": 8,
134
- "pad_sequence_to_multiple_of_64": true,
135
- "reorder_and_upcast_attn": false,
136
- "rope_embedding_base": 1000000,
137
- "rope_position_scale": 1.0,
138
- "rope_scaling": null,
139
- "torch_dtype": "bfloat16",
140
- "transformers_version": "4.41.2",
141
- "use_cache": true,
142
- "vocab_size": 100352
143
- }