Spaces:

ShivanshMathur007
/

MoP

Sleeping

File size: 5,577 Bytes

71ec341
 
 
 
 
 
 
 
 
 
 
c2f0f24
 
 
7c96a82
 
 
 
 
 
c2f0f24
7c96a82
c2f0f24
 
71ec341
c2f0f24
7ae264c
c2f0f24
 
71ec341
 
 
c2f0f24
 
 
 
 
71ec341
c2f0f24
 
 
 
71ec341
c2f0f24
b17ee83
c2f0f24
 
b4185a6
c2f0f24
 
 
71ec341
c2f0f24
 
 
 
 
7c96a82
71ec341
b17ee83
71ec341
c2f0f24
 
 
71ec341
 
 
c2f0f24
 
 
 
 
7c96a82
c2f0f24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d747b7
c2f0f24
 
 
 
 
 
 
 
 
71ec341
6029b8b
71ec341
 
 
94e3b93
 
 
71ec341
c2f0f24
71ec341
 
 
 
 
 
 
c2f0f24

from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import re
import shutil
import glob
def delete_files_except(folder_path):
  delfile=None
  files = glob.glob(os.path.join(folder_path, '*'))
  for file in files:
      if file.endswith('.pdf'):
          delfile=os.path.basename(file)
          os.remove(file)
      else:
          continue 
  return delfile
    


def MOP(path):
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
  template_vectors = FAISS.load_local('vector_database/', embeddings, allow_dangerous_deserialization=True)
  text_chunks1=8
  vectors_template=template_vectors
  docs=[]
  loader = PyPDFDirectoryLoader(path)
  docs = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200 , chunk_overlap=300)
  text_chunks2 = text_splitter.split_documents(docs)
  vector_Document = FAISS.from_documents(text_chunks2, embedding=embeddings)
  vectors_template.merge_from(vector_Document)
  prompt_file = delete_files_except(path)
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
  llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.001, "max_new_tokens": 5000})
  retriever = vectors_template.as_retriever(search_type="similarity",search_kwargs={"k": text_chunks1+len(text_chunks2)})
  qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)


  
  message1= f"""<s> [INST] You have two documents:

Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Pre-Check  Procedures" listed in the Template Document "Standard_MoP_template.pdf".

Instructions:

Review the "Pre-Check  Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Pre-Check  Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Pre-Check  Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Pre-Check  Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Pre-Check  Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s> """

  message2= f"""<s> [INST] You have two documents:

Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Post-Check  Procedures" listed in the Template Document "Standard_MoP_template.pdf".

Instructions:

Review the "Post-Check  Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Post-Check  Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Post-Check  Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Post-Check  Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Post-Check  Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s> """

  result1=qa.run(message1)
  result2=qa.run(message2)

  patterns = [
    r'Helpful Answer:\n\n(.*)$',
    r'Helpful Answer: \n\n(.*)$'
  ]
  answer=list()
  answer.append(result1)
  answer.append(result2)
  helpful_answer_texts_o = ""
  for result in answer:
      for pattern in patterns:
          match = re.search(pattern, result, re.DOTALL)
          if match:
              helpful_answer_texts_o += match.group(1) + "\n\n ************************************************************" + '\n' + '\n'

  helpful_answer_texts_o = helpful_answer_texts_o.strip()

  if helpful_answer_texts_o:
    return helpful_answer_texts_o
  
  return result1 + result2 
  

def process_file(fileobj):
  destination_folder='data'
  file_name = os.path.basename(fileobj)
  destination_path = os.path.join(destination_folder, file_name)
  shutil.copyfile(fileobj.name, destination_path)
  # return str(destination_folder)
  print(file_name)
  return MOP(destination_folder)


demo = gr.Interface(
    fn=process_file,
    inputs=[
        "file",
    ],
    outputs="text"
)
demo.launch()