Spaces:

md-vasim
/

llama-2-gguf

Sleeping

App Files Files Community

llama-2-gguf / gradio_app.py

md-vasim

gradio_app added

c32b415 9 months ago

raw

history blame contribute delete

1.92 kB

	from langchain.callbacks.manager import CallbackManager
	from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
	from langchain.chains import LLMChain
	from langchain.prompts import PromptTemplate
	from langchain_community.llms import LlamaCpp
	import gradio as gr
	import os

	os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

	REPO = "TheBloke/Llama-2-7B-Chat-GGUF"
	MODEL_NAME = "llama-2-7b-chat.Q5_K_M.gguf"

	DOWNLOAD_MODEL = f"huggingface-cli download {REPO} {MODEL_NAME} --local-dir . --local-dir-use-symlinks False"

	MODEL_PATH = "llama-2-7b-chat.Q5_K_M.gguf"

	if not os.path.exists(MODEL_PATH):
	os.system(DOWNLOAD_MODEL)

	TEMPLATE = """

	You are a helpful AI Assistant created by Mohammed Vasim. Mohammed Vasim is an AI Engineer.

	Question: {question}

	Answer: helpful answer"""

	prompt = PromptTemplate.from_template(TEMPLATE)

	# Callbacks support token-wise streaming
	callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

	# Make sure the model path is correct for your system!
	llm = LlamaCpp(
	model_path=MODEL_PATH,
	temperature=0.75,
	max_tokens=2000,
	top_p=1,
	callback_manager=callback_manager,
	verbose=True, # Verbose is required to pass to the callback manager
	)

	llm_chain = LLMChain(prompt=prompt, llm=llm)

	# question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
	# llm_chain.run(question)

	title = "Welcome to Open Source LLM"

	description = "This is a Llama-2-GGUF"

	def answer_query(message, history):
	print(message)
	message = llm_chain.invoke(message)
	print(message, history)
	return message

	# Gradio chat interface
	gr.ChatInterface(
	fn=answer_query,
	title=title,
	description=description,
	examples=[
	["What is a Large Language Model?"],
	["What's 9+2-1?"],
	["Write Python code to print the Fibonacci sequence"]
	]
	).queue().launch(server_name="0.0.0.0")