MoP / app.py
Ravnoor1's picture
Better Solution
e991ae0 verified
raw
history blame
No virus
5.61 kB
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import re
import shutil
import glob
def delete_files_except(folder_path):
delfile=None
files = glob.glob(os.path.join(folder_path, '*'))
delfile=os.path.basename(files[0])
os.remove(folder_path+'/'+delfile)
return delfile
def MOP(path):
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
template_vectors = FAISS.load_local('/content/drive/MyDrive/Innovation Themes /MoP/Template_embeddings', embeddings, allow_dangerous_deserialization=True)
text_chunks1=8
vectors_template=template_vectors
docs=[]
loader = PyPDFDirectoryLoader(path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200 , chunk_overlap=300)
text_chunks2 = text_splitter.split_documents(docs)
vector_Document = FAISS.from_documents(text_chunks2, embedding=embeddings)
vectors_template.merge_from(vector_Document)
# filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
prompt_file = delete_files_except(path)
# prompt_file=delete_files_except(filename_to_keep,path)
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.001, "max_new_tokens": 5000})
retriever = vectors_template.as_retriever(search_type="similarity",search_kwargs={"k": text_chunks1+len(text_chunks2)})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
message1= f"""<s> [INST] You have two documents:
Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Pre-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
Instructions:
Review the "Pre-Check Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Pre-Check Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Pre-Check Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Pre-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Pre-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""
message2= f"""<s> [INST] You have two documents:
Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Post-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
Instructions:
Review the "Post-Check Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Post-Check Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Post-Check Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Post-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Post-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""
result1=qa.run(message1)
result2=qa.run(message2)
patterns = [
r'Helpful Answer:\n\n(.*)$',
r'Helpful Answer: \n\n(.*)$'
]
answer=list()
answer.append(result1)
answer.append(result2)
helpful_answer_texts_o = ""
for result in answer:
for pattern in patterns:
match = re.search(pattern, result, re.DOTALL)
if match:
helpful_answer_texts_o += match.group(1) + "\n\n ************************************************************"
helpful_answer_texts_o = helpful_answer_texts_o.strip()
if helpful_answer_texts_o:
return helpful_answer_texts_o
return result1 + result2
def process_file(fileobj):
destination_folder="/content/check/"
file_name = os.path.basename(fileobj)
destination_path = os.path.join(destination_folder, file_name)
shutil.copyfile(fileobj.name, destination_path)
return MOP(destination_folder)
demo = gr.Interface(
fn=process_file,
inputs=[
"file",
],
outputs="text"
)
demo.launch()