dinhdat1110 commited on
Commit
cdc36a5
1 Parent(s): 70766ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -2,7 +2,12 @@ from threading import Thread
2
  import gradio as gr
3
  import transformers
4
  import torch
5
- from transformers import pipeline, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
6
 
7
 
8
  def chat_history(history) -> str:
@@ -28,7 +33,11 @@ def model_loading_pipeline():
28
  model=model_id,
29
  model_kwargs={
30
  "torch_dtype": torch.float16,
31
- "load_in_8bits": True,
 
 
 
 
32
  },
33
  streamer=streamer,
34
  )
 
2
  import gradio as gr
3
  import transformers
4
  import torch
5
+ from transformers import (
6
+ pipeline,
7
+ AutoTokenizer,
8
+ TextIteratorStreamer,
9
+ BitsAndBytesConfig
10
+ )
11
 
12
 
13
  def chat_history(history) -> str:
 
33
  model=model_id,
34
  model_kwargs={
35
  "torch_dtype": torch.float16,
36
+ "load_in_4bits": True,
37
+ "quantization_config": BitsAndBytesConfig(
38
+ load_in_4bit=True,
39
+ bnb_4bit_compute_dtype=torch.float16
40
+ ),
41
  },
42
  streamer=streamer,
43
  )