|
from awq import AutoAWQForCausalLM |
|
from transformers import AutoTokenizer |
|
|
|
MODEL_PATH = "DeepSeek-V3-1B-Test" |
|
QUANT_PATH = "DeepSeek-V3-1B-Test-AWQ" |
|
QUANT_CONFIG = {"zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM", "modules_to_not_convert": ["self_attn.kv_a_proj_with_mqa"]} |
|
|
|
def main(): |
|
model = AutoAWQForCausalLM.from_pretrained(MODEL_PATH, low_cpu_mem_usage=True, use_cache=False) |
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, legacy=True) |
|
model.quantize( |
|
tokenizer, |
|
quant_config=QUANT_CONFIG, |
|
) |
|
model.save_quantized(QUANT_PATH) |
|
tokenizer.save_pretrained(QUANT_PATH) |
|
print(f"Model is quantized and saved at \"{QUANT_PATH}\".") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|