freeCS-dot-org commited on
Commit
a5e2fed
1 Parent(s): 0244d86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -8
app.py CHANGED
@@ -33,18 +33,11 @@ h3 {
33
 
34
  device = "cuda" # for GPU usage or "cpu" for CPU usage
35
 
36
- quantization_config = BitsAndBytesConfig(
37
- load_in_4bit=True,
38
- bnb_4bit_compute_dtype=torch.bfloat16,
39
- bnb_4bit_use_double_quant=True,
40
- bnb_4bit_quant_type= "nf4")
41
-
42
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
43
  model = AutoModelForCausalLM.from_pretrained(
44
  MODEL,
45
  torch_dtype=torch.bfloat16,
46
- device_map="auto",
47
- quantization_config=quantization_config)
48
 
49
  @spaces.GPU()
50
  def stream_chat(
 
33
 
34
  device = "cuda" # for GPU usage or "cpu" for CPU usage
35
 
 
 
 
 
 
 
36
  tokenizer = AutoTokenizer.from_pretrained(MODEL)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL,
39
  torch_dtype=torch.bfloat16,
40
+ device_map="auto")
 
41
 
42
  @spaces.GPU()
43
  def stream_chat(