Spaces:
Sleeping
Sleeping
File size: 5,577 Bytes
71ec341 c2f0f24 7c96a82 c2f0f24 7c96a82 c2f0f24 71ec341 c2f0f24 7ae264c c2f0f24 71ec341 c2f0f24 71ec341 c2f0f24 71ec341 c2f0f24 b17ee83 c2f0f24 b4185a6 c2f0f24 71ec341 c2f0f24 7c96a82 71ec341 b17ee83 71ec341 c2f0f24 71ec341 c2f0f24 7c96a82 c2f0f24 0d747b7 c2f0f24 71ec341 6029b8b 71ec341 94e3b93 71ec341 c2f0f24 71ec341 c2f0f24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import re
import shutil
import glob
def delete_files_except(folder_path):
delfile=None
files = glob.glob(os.path.join(folder_path, '*'))
for file in files:
if file.endswith('.pdf'):
delfile=os.path.basename(file)
os.remove(file)
else:
continue
return delfile
def MOP(path):
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
template_vectors = FAISS.load_local('vector_database/', embeddings, allow_dangerous_deserialization=True)
text_chunks1=8
vectors_template=template_vectors
docs=[]
loader = PyPDFDirectoryLoader(path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200 , chunk_overlap=300)
text_chunks2 = text_splitter.split_documents(docs)
vector_Document = FAISS.from_documents(text_chunks2, embedding=embeddings)
vectors_template.merge_from(vector_Document)
prompt_file = delete_files_except(path)
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.001, "max_new_tokens": 5000})
retriever = vectors_template.as_retriever(search_type="similarity",search_kwargs={"k": text_chunks1+len(text_chunks2)})
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
message1= f"""<s> [INST] You have two documents:
Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Pre-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
Instructions:
Review the "Pre-Check Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Pre-Check Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Pre-Check Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Pre-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Pre-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s> """
message2= f"""<s> [INST] You have two documents:
Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Post-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
Instructions:
Review the "Post-Check Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Post-Check Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Post-Check Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Post-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Post-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s> """
result1=qa.run(message1)
result2=qa.run(message2)
patterns = [
r'Helpful Answer:\n\n(.*)$',
r'Helpful Answer: \n\n(.*)$'
]
answer=list()
answer.append(result1)
answer.append(result2)
helpful_answer_texts_o = ""
for result in answer:
for pattern in patterns:
match = re.search(pattern, result, re.DOTALL)
if match:
helpful_answer_texts_o += match.group(1) + "\n\n ************************************************************" + '\n' + '\n'
helpful_answer_texts_o = helpful_answer_texts_o.strip()
if helpful_answer_texts_o:
return helpful_answer_texts_o
return result1 + result2
def process_file(fileobj):
destination_folder='data'
file_name = os.path.basename(fileobj)
destination_path = os.path.join(destination_folder, file_name)
shutil.copyfile(fileobj.name, destination_path)
# return str(destination_folder)
print(file_name)
return MOP(destination_folder)
demo = gr.Interface(
fn=process_file,
inputs=[
"file",
],
outputs="text"
)
demo.launch() |