Spaces:

tferhan
/

data_gov_ma

Sleeping

File size: 3,387 Bytes

f28182b
267b2e4
f28182b
 
db101f6
f28182b
 
a9899cb
f28182b
9d4d8e4
d5f8b4a
db101f6
9d4d8e4
f28182b
a7c6c77
267b2e4
a7c6c77
 
 
 
267b2e4
a7c6c77
267b2e4
a7c6c77
 
 
 
 
 
f28182b
 
 
 
 
d704cac
f28182b
4f18e4f
f28182b
 
a7c6c77
 
 
 
 
f28182b
a7c6c77
f28182b
 
fc767c3
f28182b
fc767c3
f28182b
fc767c3
f28182b
fc767c3
f28182b
fc767c3
f28182b
fc767c3
 
 
 
 
 
 
 
 
 
 
f28182b
 
 
 
3b929bd
a9899cb
f28182b
a9899cb
 
 
 
 
f28182b

from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.llms import HuggingFaceEndpoint
from langchain.prompts.prompt import PromptTemplate
from pathlib import Path
from langchain_core.output_parsers import StrOutputParser
from unidecode import unidecode
# import tqdm
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEndpoint
# import accelerate

# c_splitter = CharacterTextSplitter(
    
#     chunk_size = 350,
#     chunk_overlap = 4,
#     separator = """,
#       ]""",

# )

# def load_doc(file_path):
#     loader = TextLoader(file_path)
#     pages = loader.load()
#     text_splitter = c_splitter
#     doc_splits = text_splitter.split_documents(pages)
#     return doc_splits



llm = HuggingFaceEndpoint(
            repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1',
            temperature = 0.17,
            max_new_tokens = 512,
            top_k = 30
        )

# def process_data():
#   splt = load_doc('intents_v2.txt')
#   embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
#   db = FAISS.from_documents(splt, embeddings)
#   return db

# db = process_data()


prompt = """You are an Assistant with expertise in providing answers based on provided information about a specific website. The user will present a general question related to the site, and using the available data, you should formulate an accurate and helpful answer. Your role includes web data comprehension, question interpretation, and clear communication. Remember to tailor your responses according to the context presented by the user and the details extracted from the pertinent website."

For a more detailed breakdown, consider these elements:

Role: Website Information Assistant

Skills: Web Data Comprehension, Question Interpretation, Clear Communication

Context: User presents a general question related to a particular website; you provide an accurate and helpful answer utilizing available data.

Task: Analyze user questions, understand associated web data, and construct appropriate answers.

Steps:

Acknowledge the user's question and express understanding.
Identify keywords or concepts within the question that relate to the website data.
Search through the available data to locate relevant facts or explanations.
Formulate a concise and engaging response addressing the user's query.
Validate the accuracy and relevancy of the generated answer before delivering it.
Answer Characteristics: Accurate, well-structured, easy to comprehend, directly addresses the user's question.
 Here is the website informations : {document}
        Human: {input}
        AI Assistant:
"""

prompt_2 = PromptTemplate(input_variables=['input', 'document'], template = prompt)
conversation_chain = prompt_2 | llm | StrOutputParser()
        # Set up a conversational chain to retrieve and generate responses.
# conversation_chain = ConversationalRetrievalChain.from_llm(
#             llm=llm,
#             retriever=db.as_retriever(),
#             condense_question_prompt=PromptTemplate(input_variables=['input'], template=prompt),
#         )