|
{ |
|
"bits": 4, |
|
"dataset": "wikitext2", |
|
"group_size": 128, |
|
"damp_percent": 0.1, |
|
"desc_act": false, |
|
"sym": true, |
|
"true_sequential": true, |
|
"quant_method": "gptq", |
|
"modules_in_block_to_quantize": [ |
|
[ |
|
"self_attn.k_proj", |
|
"self_attn.v_proj", |
|
"self_attn.q_proj" |
|
], |
|
[ |
|
"self_attn.o_proj" |
|
], |
|
[ |
|
"block_sparse_moe.experts.0.w1", |
|
"block_sparse_moe.experts.1.w1", |
|
"block_sparse_moe.experts.2.w1", |
|
"block_sparse_moe.experts.3.w1", |
|
"block_sparse_moe.experts.4.w1", |
|
"block_sparse_moe.experts.5.w1", |
|
"block_sparse_moe.experts.6.w1", |
|
"block_sparse_moe.experts.7.w1", |
|
"block_sparse_moe.experts.0.w3", |
|
"block_sparse_moe.experts.1.w3", |
|
"block_sparse_moe.experts.2.w3", |
|
"block_sparse_moe.experts.3.w3", |
|
"block_sparse_moe.experts.4.w3", |
|
"block_sparse_moe.experts.5.w3", |
|
"block_sparse_moe.experts.6.w3", |
|
"block_sparse_moe.experts.7.w3" |
|
], |
|
[ |
|
"block_sparse_moe.experts.0.w2", |
|
"block_sparse_moe.experts.1.w2", |
|
"block_sparse_moe.experts.2.w2", |
|
"block_sparse_moe.experts.3.w2", |
|
"block_sparse_moe.experts.4.w2", |
|
"block_sparse_moe.experts.5.w2", |
|
"block_sparse_moe.experts.6.w2", |
|
"block_sparse_moe.experts.7.w2" |
|
] |
|
] |
|
} |