File size: 5,606 Bytes
71ec341
 
 
 
 
 
 
 
 
 
 
e991ae0
 
 
 
 
 
 
 
71ec341
e991ae0
 
 
 
71ec341
 
 
e991ae0
 
 
 
 
 
 
 
71ec341
e991ae0
 
 
 
71ec341
e991ae0
 
 
 
b4185a6
e991ae0
 
 
71ec341
e991ae0
 
 
 
 
 
71ec341
e991ae0
71ec341
e991ae0
 
 
71ec341
 
 
e991ae0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71ec341
e991ae0
71ec341
 
 
 
 
e991ae0
71ec341
 
 
 
 
 
 
e991ae0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gradio as gr
import re
import shutil
import glob
def delete_files_except(folder_path):
  delfile=None
  files = glob.glob(os.path.join(folder_path, '*'))
  delfile=os.path.basename(files[0])
  os.remove(folder_path+'/'+delfile)
  return delfile


def MOP(path):
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
  template_vectors = FAISS.load_local('/content/drive/MyDrive/Innovation Themes /MoP/Template_embeddings', embeddings, allow_dangerous_deserialization=True)
  text_chunks1=8
  vectors_template=template_vectors
  docs=[]
  loader = PyPDFDirectoryLoader(path)
  docs = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200 , chunk_overlap=300)
  text_chunks2 = text_splitter.split_documents(docs)
  vector_Document = FAISS.from_documents(text_chunks2, embedding=embeddings)
  vectors_template.merge_from(vector_Document)
  # filename_to_keep = 'Dummy_standard MoP_template_new.pdf'

  prompt_file = delete_files_except(path)
   # prompt_file=delete_files_except(filename_to_keep,path)
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
  llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.001, "max_new_tokens": 5000})
  retriever = vectors_template.as_retriever(search_type="similarity",search_kwargs={"k": text_chunks1+len(text_chunks2)})
  qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)


  
  message1= f"""<s> [INST] You have two documents:

Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Pre-Check  Procedures" listed in the Template Document "Standard_MoP_template.pdf".

Instructions:

Review the "Pre-Check  Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Pre-Check  Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Pre-Check  Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Pre-Check  Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Pre-Check  Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""

  message2= f"""<s> [INST] You have two documents:

Template Document: "Standard_MoP_template.pdf"
Content Document: "{prompt_file}"
Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Post-Check  Procedures" listed in the Template Document "Standard_MoP_template.pdf".

Instructions:

Review the "Post-Check  Procedures" listed in the Template Document.
Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Post-Check  Procedures" or implicitly covered elsewhere in the document.
Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Post-Check  Procedures", considering both explicit and implicit coverage.
If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Post-Check  Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
Make sure to accurately evaluate the alignment of the "Post-Check  Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""

  result1=qa.run(message1)
  result2=qa.run(message2)

  patterns = [
    r'Helpful Answer:\n\n(.*)$',
    r'Helpful Answer: \n\n(.*)$'
  ]
  answer=list()
  answer.append(result1)
  answer.append(result2)
  helpful_answer_texts_o = ""
  for result in answer:
      for pattern in patterns:
          match = re.search(pattern, result, re.DOTALL)
          if match:
              helpful_answer_texts_o += match.group(1) + "\n\n ************************************************************"

  helpful_answer_texts_o = helpful_answer_texts_o.strip()

  if helpful_answer_texts_o:
    return helpful_answer_texts_o
  
  return result1 + result2 
  

def process_file(fileobj):
  destination_folder="/content/check/"
  file_name = os.path.basename(fileobj)
  destination_path = os.path.join(destination_folder, file_name)
  shutil.copyfile(fileobj.name, destination_path)
  return MOP(destination_folder)


demo = gr.Interface(
    fn=process_file,
    inputs=[
        "file",
    ],
    outputs="text"
)
demo.launch()