binqiangliu commited on
Commit
ab440a4
1 Parent(s): 878d57a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -2
app.py CHANGED
@@ -32,7 +32,8 @@ def load_quantized_model(model_name: str):
32
  """
33
  bnb_config = BitsAndBytesConfig(
34
  load_in_4bit=True,
35
- bnb_4bit_use_double_quant=True,
 
36
  bnb_4bit_quant_type="nf4",
37
  bnb_4bit_compute_dtype=torch.bfloat16
38
  )
@@ -40,7 +41,8 @@ def load_quantized_model(model_name: str):
40
  model = AutoModelForCausalLM.from_pretrained(
41
  model_name,
42
  load_in_4bit=True,
43
- torch_dtype=torch.bfloat16,
 
44
  quantization_config=bnb_config
45
  )
46
  return model
 
32
  """
33
  bnb_config = BitsAndBytesConfig(
34
  load_in_4bit=True,
35
+ #bnb_4bit_use_double_quant=True,
36
+ bnb_4bit_use_double_quant=False,
37
  bnb_4bit_quant_type="nf4",
38
  bnb_4bit_compute_dtype=torch.bfloat16
39
  )
 
41
  model = AutoModelForCausalLM.from_pretrained(
42
  model_name,
43
  load_in_4bit=True,
44
+ #torch_dtype=torch.bfloat16,
45
+ #torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
46
  quantization_config=bnb_config
47
  )
48
  return model