Spaces:

deepapaikar
/

Katz_Llama_finetuned

Runtime error

Katz_Llama_finetuned / app.py

Update app.py

b5efb25 verified 4 months ago

1.12 kB

	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
	import gradio as gr
	import torch
	import spaces

	model_id = "deepapaikar/Llama_SCplusQA_10epochs"
	print("Before loading model")
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_use_double_quant=True,
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_id,
	return_dict=True,
	device_map="auto",
	trust_remote_code=True,
	config=bnb_config,
	)

	tokenizer = AutoTokenizer.from_pretrained(model_id)
	tokenizer.pad_token = tokenizer.eos_token

	pipe = pipeline(
	task="text-generation",
	model=model,
	tokenizer=tokenizer,
	max_length=100,
	trust_remote_code=True,
	)
	print("Model loaded successfully")
	#system_message = "Answer the questions truthfully and to the point."

	def generate_response(query, history):
	ans = pipe(f"[INST] {query} [/INST]")
	result= ans[0]['generated_text']
	return result

	demo = gr.ChatInterface(generate_response)

	if __name__ == "__main__":
	demo.launch(share=True)