mgoin's picture
Upload folder using huggingface_hub
6483017 verified
test_stage:
obcq_modifiers:
SmoothQuantModifier:
smoothing_strength: 0.8
mappings:
- - - re:.*q_proj
- re:.*k_proj
- re:.*v_proj
- re:.*input_layernorm
- - - re:.*gate_proj
- re:.*up_proj
- re:.*post_attention_layernorm
- - - re:.*down_proj
- re:.*up_proj
QuantizationModifier:
ignore:
- LlamaRotaryEmbedding
- LlamaRMSNorm
- SiLUActivation
- model.layers.1.mlp.down_proj
- model.layers.30.mlp.down_proj
- model.layers.0.mlp.down_proj
post_oneshot_calibration: true
scheme_overrides:
Linear:
weights:
num_bits: 8
symmetric: true
strategy: channel
MatMulLeftInput_QK:
input_activations:
num_bits: 8
symmetric: true
Embedding:
input_activations: null
weights:
num_bits: 8
symmetric: false
SparseGPTModifier:
sparsity: 0.0
block_size: 128
sequential_update: false
quantize: true
percdamp: 0.01
mask_structure: 0:0
targets:
- model.layers.0
- model.layers.1
- model.layers.2
- model.layers.3
- model.layers.4
- model.layers.5
- model.layers.6
- model.layers.7
- model.layers.8
- model.layers.9
- model.layers.10
- model.layers.11
- model.layers.12
- model.layers.13
- model.layers.14
- model.layers.15
- model.layers.16
- model.layers.17
- model.layers.18
- model.layers.19
- model.layers.20
- model.layers.21
- model.layers.22
- model.layers.23
- model.layers.24
- model.layers.25
- model.layers.26
- model.layers.27
- model.layers.28
- model.layers.29
- model.layers.30
- model.layers.31
- lm_head