asoria HF staff commited on
Commit
f895102
·
1 Parent(s): 13f17e5

Try to run without quantization

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -16,7 +16,7 @@ import numpy as np
16
  from torch import cuda
17
  from torch import bfloat16
18
  from transformers import (
19
- BitsAndBytesConfig,
20
  AutoTokenizer,
21
  AutoModelForCausalLM,
22
  pipeline,
@@ -44,12 +44,12 @@ model_id = "meta-llama/Llama-2-7b-chat-hf"
44
  device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
45
  logging.info(device)
46
 
47
- bnb_config = BitsAndBytesConfig(
48
- load_in_4bit=True, # 4-bit quantization
49
- bnb_4bit_quant_type="nf4", # Normalized float 4
50
- bnb_4bit_use_double_quant=True, # Second quantization after the first
51
- bnb_4bit_compute_dtype=bfloat16, # Computation type
52
- )
53
 
54
  tokenizer = AutoTokenizer.from_pretrained(model_id)
55
 
@@ -57,7 +57,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id)
57
  model = AutoModelForCausalLM.from_pretrained(
58
  model_id,
59
  trust_remote_code=True,
60
- quantization_config=bnb_config,
61
  device_map="auto",
62
  )
63
 
 
16
  from torch import cuda
17
  from torch import bfloat16
18
  from transformers import (
19
+ # BitsAndBytesConfig,
20
  AutoTokenizer,
21
  AutoModelForCausalLM,
22
  pipeline,
 
44
  device = f"cuda:{cuda.current_device()}" if cuda.is_available() else "cpu"
45
  logging.info(device)
46
 
47
+ # bnb_config = BitsAndBytesConfig(
48
+ # load_in_4bit=True, # 4-bit quantization
49
+ # bnb_4bit_quant_type="nf4", # Normalized float 4
50
+ # bnb_4bit_use_double_quant=True, # Second quantization after the first
51
+ # bnb_4bit_compute_dtype=bfloat16, # Computation type
52
+ # )
53
 
54
  tokenizer = AutoTokenizer.from_pretrained(model_id)
55
 
 
57
  model = AutoModelForCausalLM.from_pretrained(
58
  model_id,
59
  trust_remote_code=True,
60
+ # quantization_config=bnb_config,
61
  device_map="auto",
62
  )
63