data_gov_ma / qa_txt.py
tferhan's picture
Update qa_txt.py
9d4d8e4 verified
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.llms import HuggingFaceEndpoint
from langchain.prompts.prompt import PromptTemplate
from pathlib import Path
from langchain_core.output_parsers import StrOutputParser
from unidecode import unidecode
# import tqdm
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint
# import accelerate
# c_splitter = CharacterTextSplitter(
# chunk_size = 350,
# chunk_overlap = 4,
# separator = """,
# ]""",
# )
# def load_doc(file_path):
# loader = TextLoader(file_path)
# pages = loader.load()
# text_splitter = c_splitter
# doc_splits = text_splitter.split_documents(pages)
# return doc_splits
llm = HuggingFaceEndpoint(
repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1',
temperature = 0.17,
max_new_tokens = 512,
top_k = 30
)
# def process_data():
# splt = load_doc('intents_v2.txt')
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
# db = FAISS.from_documents(splt, embeddings)
# return db
# db = process_data()
prompt = """You are an Assistant with expertise in providing answers based on provided information about a specific website. The user will present a general question related to the site, and using the available data, you should formulate an accurate and helpful answer. Your role includes web data comprehension, question interpretation, and clear communication. Remember to tailor your responses according to the context presented by the user and the details extracted from the pertinent website."
For a more detailed breakdown, consider these elements:
Role: Website Information Assistant
Skills: Web Data Comprehension, Question Interpretation, Clear Communication
Context: User presents a general question related to a particular website; you provide an accurate and helpful answer utilizing available data.
Task: Analyze user questions, understand associated web data, and construct appropriate answers.
Steps:
Acknowledge the user's question and express understanding.
Identify keywords or concepts within the question that relate to the website data.
Search through the available data to locate relevant facts or explanations.
Formulate a concise and engaging response addressing the user's query.
Validate the accuracy and relevancy of the generated answer before delivering it.
Answer Characteristics: Accurate, well-structured, easy to comprehend, directly addresses the user's question.
Here is the website informations : {document}
Human: {input}
AI Assistant:
"""
prompt_2 = PromptTemplate(input_variables=['input', 'document'], template = prompt)
conversation_chain = prompt_2 | llm | StrOutputParser()
# Set up a conversational chain to retrieve and generate responses.
# conversation_chain = ConversationalRetrievalChain.from_llm(
# llm=llm,
# retriever=db.as_retriever(),
# condense_question_prompt=PromptTemplate(input_variables=['input'], template=prompt),
# )