mobinln's picture
v1
3af157b
raw
history blame
1.76 kB
import streamlit as st
import pathlib
from huggingface_hub import hf_hub_download
from langchain_community.llms import LlamaCpp
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
@st.cache_resource()
def load_llm(repo_id, filename):
# Create a directory for models if it doesn't exist
models_folder = pathlib.Path("models")
models_folder.mkdir(exist_ok=True)
# Download the model
model_path = hf_hub_download(
repo_id=repo_id, filename=filename, local_dir=models_folder
)
llm = LlamaCpp(
model_path=model_path,
repo_id=repo_id,
filename=filename,
verbose=False,
use_mmap=True,
use_mlock=True,
n_threads=4,
n_threads_batch=4,
n_ctx=8000,
)
print(f"{repo_id} loaded successfully. ✅")
return llm
# Streamed response emulator
def response_generator(llm, messages, question, retriever):
system_prompt = (
"You are an assistant for question-answering tasks. "
"Use the following pieces of retrieved context to answer "
"the question. If you don't know the answer, say that you "
"don't know. Use three sentences maximum and keep the "
"answer concise."
"\n\n"
"{context}"
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("user", "{input}"),
]
)
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
results = rag_chain.invoke({"input": question})
return results