shenoy commited on
Commit
8fd1ef9
1 Parent(s): 2f00d01

Add application file and dependencies

Browse files
Files changed (2) hide show
  1. app.py +39 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import transformers
4
+ from peft import PeftModel
5
+
6
+ # Quantization config
7
+ bnb_config = BitsAndBytesConfig(
8
+ load_in_4bit=True,
9
+ bnb_4bit_quant_type="nf4",
10
+ bnb_4bit_compute_dtype="float16",
11
+ )
12
+
13
+ model_name = "TinyPixel/Llama-2-7B-bf16-sharded"
14
+
15
+ # loading the model with quantization config
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ model_name,
18
+ quantization_config=bnb_config,
19
+ trust_remote_code=True,
20
+ device_map='auto'
21
+ )
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True , return_token_type_ids=False)
24
+ tokenizer.pad_token = tokenizer.eos_token
25
+
26
+ model = PeftModel.from_pretrained(model,"shenoy/DialogSumLlama2_qlora", device_map="auto")
27
+
28
+ #gradio fields
29
+ input_text = gr.inputs.Textbox(label="Input Text", type="text")
30
+ output_text = gr.outputs.Textbox(label="Output Text", type="text")
31
+
32
+ def predict(text):
33
+ inputs = tokenizer(text, return_tensors="pt")
34
+ outputs = model.generate(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], max_new_tokens=100 ,repetition_penalty=1.2)
35
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
36
+
37
+ #gradio interface
38
+ interface = gr.Interface(fn=predict, inputs=input_text, outputs=output_text)
39
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ accelerate==0.21.0
2
+ peft==0.4.0
3
+ bitsandbytes==0.40.2
4
+ transformers==4.30.2
5
+ trl==0.4.7