Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline | |
import gradio as gr | |
import torch | |
import spaces | |
model_id = "deepapaikar/Llama_SCplusQA_10epochs" | |
print("Before loading model") | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_use_double_quant=True, | |
) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
return_dict=True, | |
device_map="auto", | |
trust_remote_code=True, | |
config=bnb_config, | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
tokenizer.pad_token = tokenizer.eos_token | |
pipe = pipeline( | |
task="text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_length=100, | |
trust_remote_code=True, | |
) | |
print("Model loaded successfully") | |
#system_message = "Answer the questions truthfully and to the point." | |
def generate_response(query, history): | |
ans = pipe(f"[INST] {query} [/INST]") | |
result= ans[0]['generated_text'] | |
return result | |
demo = gr.ChatInterface(generate_response) | |
if __name__ == "__main__": | |
demo.launch(share=True) | |