truongghieu commited on
Commit
f308f42
1 Parent(s): 82303ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -1,13 +1,18 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
3
 
4
  import torch
5
 
6
  # Check if a GPU is available
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
 
 
 
 
 
9
  tokenizer = AutoTokenizer.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True)
10
- model = AutoModelForCausalLM.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True,low_cpu_mem_usage=True)
11
 
12
  # Move the model to the GPU if available
13
  model.to(device)
@@ -22,6 +27,8 @@ generation_config = GenerationConfig(
22
  pad_token_id=tokenizer.eos_token_id
23
  )
24
 
 
 
25
  # Define a function that takes a text input and generates a text output
26
  def generate_text(text):
27
  input_text = text
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig,BitsAndBytesConfig
3
 
4
  import torch
5
 
6
  # Check if a GPU is available
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
 
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True
11
+ )
12
+
13
+
14
  tokenizer = AutoTokenizer.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True)
15
+ model = AutoModelForCausalLM.from_pretrained("truongghieu/deci-finetuned", trust_remote_code=True, quantization_config=bnb_config)
16
 
17
  # Move the model to the GPU if available
18
  model.to(device)
 
27
  pad_token_id=tokenizer.eos_token_id
28
  )
29
 
30
+
31
+
32
  # Define a function that takes a text input and generates a text output
33
  def generate_text(text):
34
  input_text = text