File size: 957 Bytes
be6f120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from transformers import AutoTokenizer, TextGenerationPipeline
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
import logging

pretrained_model_dir: str = "models/WizardLM-7B-Uncensored"
quantized_model_dir: str = "./"
config: dict = dict(
    quantize_config=dict(model_file_base_name='WizardLM-7B-Uncensored',
                         bits=8, desc_act=False, group_size=128, true_sequential=True),
    use_safetensors=True
)

logging.basicConfig(
    format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
)

tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
examples: list[dict[str, list[int]]] = [tokenizer("It was a cold night")]

model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, BaseQuantizeConfig(**config['quantize_config']))
model.quantize(examples)

model.save_quantized(quantized_model_dir, use_safetensors=config['use_safetensors'])