mgoin commited on
Commit
f20cb14
1 Parent(s): 3d65bb5

Updated compression_config to quantization_config

Browse files
Files changed (1) hide show
  1. config.json +75 -75
config.json CHANGED
@@ -10,80 +10,6 @@
10
  "AutoModelForCausalLM": "modeling_phimoe.PhiMoEForCausalLM"
11
  },
12
  "bos_token_id": 1,
13
- "compression_config": {
14
- "config_groups": {
15
- "group_0": {
16
- "input_activations": {
17
- "actorder": null,
18
- "block_structure": null,
19
- "dynamic": false,
20
- "group_size": null,
21
- "num_bits": 8,
22
- "observer": "minmax",
23
- "observer_kwargs": {},
24
- "strategy": "tensor",
25
- "symmetric": true,
26
- "type": "float"
27
- },
28
- "output_activations": null,
29
- "targets": [
30
- "Linear"
31
- ],
32
- "weights": {
33
- "actorder": null,
34
- "block_structure": null,
35
- "dynamic": false,
36
- "group_size": null,
37
- "num_bits": 8,
38
- "observer": "minmax",
39
- "observer_kwargs": {},
40
- "strategy": "tensor",
41
- "symmetric": true,
42
- "type": "float"
43
- }
44
- }
45
- },
46
- "format": "float-quantized",
47
- "global_compression_ratio": 1.56406776155077,
48
- "ignore": [
49
- "model.layers.0.block_sparse_moe.gate",
50
- "model.layers.1.block_sparse_moe.gate",
51
- "model.layers.2.block_sparse_moe.gate",
52
- "model.layers.3.block_sparse_moe.gate",
53
- "model.layers.4.block_sparse_moe.gate",
54
- "model.layers.5.block_sparse_moe.gate",
55
- "model.layers.6.block_sparse_moe.gate",
56
- "model.layers.7.block_sparse_moe.gate",
57
- "model.layers.8.block_sparse_moe.gate",
58
- "model.layers.9.block_sparse_moe.gate",
59
- "model.layers.10.block_sparse_moe.gate",
60
- "model.layers.11.block_sparse_moe.gate",
61
- "model.layers.12.block_sparse_moe.gate",
62
- "model.layers.13.block_sparse_moe.gate",
63
- "model.layers.14.block_sparse_moe.gate",
64
- "model.layers.15.block_sparse_moe.gate",
65
- "model.layers.16.block_sparse_moe.gate",
66
- "model.layers.17.block_sparse_moe.gate",
67
- "model.layers.18.block_sparse_moe.gate",
68
- "model.layers.19.block_sparse_moe.gate",
69
- "model.layers.20.block_sparse_moe.gate",
70
- "model.layers.21.block_sparse_moe.gate",
71
- "model.layers.22.block_sparse_moe.gate",
72
- "model.layers.23.block_sparse_moe.gate",
73
- "model.layers.24.block_sparse_moe.gate",
74
- "model.layers.25.block_sparse_moe.gate",
75
- "model.layers.26.block_sparse_moe.gate",
76
- "model.layers.27.block_sparse_moe.gate",
77
- "model.layers.28.block_sparse_moe.gate",
78
- "model.layers.29.block_sparse_moe.gate",
79
- "model.layers.30.block_sparse_moe.gate",
80
- "model.layers.31.block_sparse_moe.gate",
81
- "lm_head"
82
- ],
83
- "kv_cache_scheme": null,
84
- "quant_method": "compressed-tensors",
85
- "quantization_status": "compressed"
86
- },
87
  "eos_token_id": 32000,
88
  "hidden_act": "silu",
89
  "hidden_dropout": 0.0,
@@ -248,5 +174,79 @@
248
  "torch_dtype": "bfloat16",
249
  "transformers_version": "4.44.2",
250
  "use_cache": true,
251
- "vocab_size": 32064
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  }
 
10
  "AutoModelForCausalLM": "modeling_phimoe.PhiMoEForCausalLM"
11
  },
12
  "bos_token_id": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "eos_token_id": 32000,
14
  "hidden_act": "silu",
15
  "hidden_dropout": 0.0,
 
174
  "torch_dtype": "bfloat16",
175
  "transformers_version": "4.44.2",
176
  "use_cache": true,
177
+ "vocab_size": 32064,
178
+ "quantization_config": {
179
+ "config_groups": {
180
+ "group_0": {
181
+ "input_activations": {
182
+ "actorder": null,
183
+ "block_structure": null,
184
+ "dynamic": false,
185
+ "group_size": null,
186
+ "num_bits": 8,
187
+ "observer": "minmax",
188
+ "observer_kwargs": {},
189
+ "strategy": "tensor",
190
+ "symmetric": true,
191
+ "type": "float"
192
+ },
193
+ "output_activations": null,
194
+ "targets": [
195
+ "Linear"
196
+ ],
197
+ "weights": {
198
+ "actorder": null,
199
+ "block_structure": null,
200
+ "dynamic": false,
201
+ "group_size": null,
202
+ "num_bits": 8,
203
+ "observer": "minmax",
204
+ "observer_kwargs": {},
205
+ "strategy": "tensor",
206
+ "symmetric": true,
207
+ "type": "float"
208
+ }
209
+ }
210
+ },
211
+ "format": "float-quantized",
212
+ "global_compression_ratio": 1.56406776155077,
213
+ "ignore": [
214
+ "model.layers.0.block_sparse_moe.gate",
215
+ "model.layers.1.block_sparse_moe.gate",
216
+ "model.layers.2.block_sparse_moe.gate",
217
+ "model.layers.3.block_sparse_moe.gate",
218
+ "model.layers.4.block_sparse_moe.gate",
219
+ "model.layers.5.block_sparse_moe.gate",
220
+ "model.layers.6.block_sparse_moe.gate",
221
+ "model.layers.7.block_sparse_moe.gate",
222
+ "model.layers.8.block_sparse_moe.gate",
223
+ "model.layers.9.block_sparse_moe.gate",
224
+ "model.layers.10.block_sparse_moe.gate",
225
+ "model.layers.11.block_sparse_moe.gate",
226
+ "model.layers.12.block_sparse_moe.gate",
227
+ "model.layers.13.block_sparse_moe.gate",
228
+ "model.layers.14.block_sparse_moe.gate",
229
+ "model.layers.15.block_sparse_moe.gate",
230
+ "model.layers.16.block_sparse_moe.gate",
231
+ "model.layers.17.block_sparse_moe.gate",
232
+ "model.layers.18.block_sparse_moe.gate",
233
+ "model.layers.19.block_sparse_moe.gate",
234
+ "model.layers.20.block_sparse_moe.gate",
235
+ "model.layers.21.block_sparse_moe.gate",
236
+ "model.layers.22.block_sparse_moe.gate",
237
+ "model.layers.23.block_sparse_moe.gate",
238
+ "model.layers.24.block_sparse_moe.gate",
239
+ "model.layers.25.block_sparse_moe.gate",
240
+ "model.layers.26.block_sparse_moe.gate",
241
+ "model.layers.27.block_sparse_moe.gate",
242
+ "model.layers.28.block_sparse_moe.gate",
243
+ "model.layers.29.block_sparse_moe.gate",
244
+ "model.layers.30.block_sparse_moe.gate",
245
+ "model.layers.31.block_sparse_moe.gate",
246
+ "lm_head"
247
+ ],
248
+ "kv_cache_scheme": null,
249
+ "quant_method": "compressed-tensors",
250
+ "quantization_status": "compressed"
251
+ }
252
  }