deepapaikar's picture
Update app.py
b5efb25 verified
raw
history blame
1.12 kB
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
import gradio as gr
import torch
import spaces
model_id = "deepapaikar/Llama_SCplusQA_10epochs"
print("Before loading model")
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
return_dict=True,
device_map="auto",
trust_remote_code=True,
config=bnb_config,
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
pipe = pipeline(
task="text-generation",
model=model,
tokenizer=tokenizer,
max_length=100,
trust_remote_code=True,
)
print("Model loaded successfully")
#system_message = "Answer the questions truthfully and to the point."
def generate_response(query, history):
ans = pipe(f"[INST] {query} [/INST]")
result= ans[0]['generated_text']
return result
demo = gr.ChatInterface(generate_response)
if __name__ == "__main__":
demo.launch(share=True)