vz-mop / app_wip.py
vedsadani's picture
Create app_wip.py
4ea3ce6 verified
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.llms import HuggingFaceHub
from getpass import getpass
import os
from langchain.embeddings import HuggingFaceEmbeddings
from google.colab import drive
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationSummaryBufferMemory
import io
import contextlib
import pandas as pd
from PyPDF2 import PdfReader
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFDirectoryLoader
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.document_loaders import PyPDFLoader
import gradio as gr
def process_file(fileobj):
destination_folder="Docs"
file_name = os.path.basename(fileobj)
destination_path = os.path.join(destination_folder, file_name)
shutil.copyfile(fileobj.name, destination_path)
return MOP(destination_folder)
def MOP(path):
docs=[]
loader = PyPDFDirectoryLoader(path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=500)
text_chunks = text_splitter.split_documents(docs)
embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-base")
vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
prompt_file=delete_files_except(filename_to_keep,path)
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 2048})
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k": len(text_chunks)})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,verbose=True)
message= f"""<s> [INST] You have two documents:
Template Document: "Dummy_standard MoP_template_new.pdf"
Content Document: "{prompt_file}"
Your task is to determine whether the Content Document effectively incorporates the context and instructions of the Pre-Check Procedures and Post-Check Procedures specified in the Template Document, without the requirement for them to be under specified labels. The focus should be on the presence of the context and instructions rather than their exact placement.
Provide a "Yes" or "No" response indicating whether the Content Document accurately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures as outlined in the Template Document.
Additionally, identify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures if present, regardless of their placement within the Content Document.
Instructions:
Review the context and instructions of the Pre-Check Procedures and Post-Check Procedures detailed in the Template Document ("Dummy_standard MoP_template_new.pdf").
Assess whether the Content Document ("{prompt_file}") includes the necessary context and instructions for the Pre-Check Procedures and Post-Check Procedures, regardless of their specific placement or labeling.
Provide a "Yes" if the Content Document adequately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures, or "No" if there are significant gaps or omissions.
If the answer is "No," specify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures, emphasizing their importance in the Content Document.
Ensure careful consideration of the context and instructions provided in the Template Document ("Dummy_standard MoP_template_new.pdf") while evaluating the alignment of the Content Document ("{prompt_file}"). [/INST] </s>"""
result=qa.run(message)
pattern = r"Helpful Answer:\n\n(.*)"
match = re.search(pattern, result, re.DOTALL)
if match:
helpful_answer_text = match.group(1)
return helpful_answer_text
return result
demo = gr.Interface(
fn=process_file,
inputs=[
"file",
],
outputs="text"
)
demo.launch()