Spaces:

binqiangliu
/

Zephyr7BAlpha

Runtime error

binqiangliu commited on Oct 23, 2023

Commit

ab440a4

•

1 Parent(s): 878d57a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,7 +32,8 @@ def load_quantized_model(model_name: str):
     """
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
-        bnb_4bit_use_double_quant=True,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16
     )
@@ -40,7 +41,8 @@ def load_quantized_model(model_name: str):
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         load_in_4bit=True,
-        torch_dtype=torch.bfloat16,
         quantization_config=bnb_config
     )
     return model

     """
     bnb_config = BitsAndBytesConfig(
         load_in_4bit=True,
+        #bnb_4bit_use_double_quant=True,
+        bnb_4bit_use_double_quant=False,
         bnb_4bit_quant_type="nf4",
         bnb_4bit_compute_dtype=torch.bfloat16
     )
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         load_in_4bit=True,
+        #torch_dtype=torch.bfloat16,
+        #torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
         quantization_config=bnb_config
     )
     return model