Files changed (1) hide show
  1. app.py +76 -40
app.py CHANGED
@@ -9,64 +9,100 @@ import gradio as gr
9
  import re
10
  import shutil
11
  import glob
12
- def delete_files_except(filename, folder_path):
13
- delfile=None
14
- files = glob.glob(os.path.join(folder_path, '*'))
15
- for file in files:
16
- if os.path.basename(file) == filename:
17
- continue
18
- else:
19
- delfile=os.path.basename(file)
20
- os.remove(file)
21
- return delfile
22
  def MOP(path):
 
 
 
 
23
  docs=[]
24
  loader = PyPDFDirectoryLoader(path)
25
  docs = loader.load()
26
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000 , chunk_overlap=500)
27
- text_chunks = text_splitter.split_documents(docs)
28
- embeddings = HuggingFaceEmbeddings(model_name="thenlper/gte-base")
29
- vector_store = FAISS.from_documents(text_chunks, embedding=embeddings)
30
- filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
31
- prompt_file=delete_files_except(filename_to_keep,path)
 
 
32
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
33
- llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.1, "max_new_tokens": 2048})
34
- retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k": len(text_chunks)})
35
- qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever,verbose=True)
36
- message= f"""<s> [INST] You have two documents:
37
 
38
- Template Document: "Dummy_standard MoP_template_new.pdf"
 
 
 
39
  Content Document: "{prompt_file}"
 
 
 
40
 
41
- Your task is to determine whether the Content Document effectively incorporates the context and instructions of the Pre-Check Procedures and Post-Check Procedures specified in the Template Document, without the requirement for them to be under specified labels. The focus should be on the presence of the context and instructions rather than their exact placement.
 
 
 
 
 
42
 
43
- Provide a "Yes" or "No" response indicating whether the Content Document accurately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures as outlined in the Template Document.
44
 
45
- Additionally, identify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures if present, regardless of their placement within the Content Document.
 
 
46
 
47
  Instructions:
48
 
49
- Review the context and instructions of the Pre-Check Procedures and Post-Check Procedures detailed in the Template Document ("Dummy_standard MoP_template_new.pdf").
50
- Assess whether the Content Document ("{prompt_file}") includes the necessary context and instructions for the Pre-Check Procedures and Post-Check Procedures, regardless of their specific placement or labeling.
51
- Provide a "Yes" if the Content Document adequately integrates the context and instructions of the Pre-Check Procedures and Post-Check Procedures, or "No" if there are significant gaps or omissions.
52
- If the answer is "No," specify any missing elements related to the context and instructions of the Pre-Check Procedures and Post-Check Procedures, emphasizing their importance in the Content Document.
53
- Ensure careful consideration of the context and instructions provided in the Template Document ("Dummy_standard MoP_template_new.pdf") while evaluating the alignment of the Content Document ("{prompt_file}"). [/INST] </s>"""
54
- result=qa.run(message)
55
- pattern = r"Helpful Answer:\n\n(.*)"
56
- match = re.search(pattern, result, re.DOTALL)
57
- if match:
58
- helpful_answer_text = match.group(1)
59
- return helpful_answer_text
60
- return result
61
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def process_file(fileobj):
63
- destination_folder="data"
64
  file_name = os.path.basename(fileobj)
65
  destination_path = os.path.join(destination_folder, file_name)
66
  shutil.copyfile(fileobj.name, destination_path)
67
  return MOP(destination_folder)
68
 
69
-
70
  demo = gr.Interface(
71
  fn=process_file,
72
  inputs=[
@@ -74,4 +110,4 @@ demo = gr.Interface(
74
  ],
75
  outputs="text"
76
  )
77
- demo.launch()
 
9
  import re
10
  import shutil
11
  import glob
12
+ def delete_files_except(folder_path):
13
+ delfile=None
14
+ files = glob.glob(os.path.join(folder_path, '*'))
15
+ delfile=os.path.basename(files[0])
16
+ os.remove(folder_path+'/'+delfile)
17
+ return delfile
18
+
19
+
 
 
20
  def MOP(path):
21
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3")
22
+ template_vectors = FAISS.load_local('/content/drive/MyDrive/Innovation Themes /MoP/Template_embeddings', embeddings, allow_dangerous_deserialization=True)
23
+ text_chunks1=8
24
+ vectors_template=template_vectors
25
  docs=[]
26
  loader = PyPDFDirectoryLoader(path)
27
  docs = loader.load()
28
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200 , chunk_overlap=300)
29
+ text_chunks2 = text_splitter.split_documents(docs)
30
+ vector_Document = FAISS.from_documents(text_chunks2, embedding=embeddings)
31
+ vectors_template.merge_from(vector_Document)
32
+ # filename_to_keep = 'Dummy_standard MoP_template_new.pdf'
33
+
34
+ prompt_file = delete_files_except(path)
35
+ # prompt_file=delete_files_except(filename_to_keep,path)
36
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"
37
+ llm = HuggingFaceHub(repo_id=repo_id, model_kwargs={"temperature": 0.001, "max_new_tokens": 5000})
38
+ retriever = vectors_template.as_retriever(search_type="similarity",search_kwargs={"k": text_chunks1+len(text_chunks2)})
39
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
40
+
41
 
42
+
43
+ message1= f"""<s> [INST] You have two documents:
44
+
45
+ Template Document: "Standard_MoP_template.pdf"
46
  Content Document: "{prompt_file}"
47
+ Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Pre-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
48
+
49
+ Instructions:
50
 
51
+ Review the "Pre-Check Procedures" listed in the Template Document.
52
+ Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Pre-Check Procedures" or implicitly covered elsewhere in the document.
53
+ Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Pre-Check Procedures", considering both explicit and implicit coverage.
54
+ If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
55
+ If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Pre-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
56
+ Make sure to accurately evaluate the alignment of the "Pre-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""
57
 
58
+ message2= f"""<s> [INST] You have two documents:
59
 
60
+ Template Document: "Standard_MoP_template.pdf"
61
+ Content Document: "{prompt_file}"
62
+ Evaluate whether the Content Document ("{prompt_file}") effectively integrates most of the necessary context and instructions for the "Post-Check Procedures" listed in the Template Document "Standard_MoP_template.pdf".
63
 
64
  Instructions:
65
 
66
+ Review the "Post-Check Procedures" listed in the Template Document.
67
+ Assess whether the Content Document ("{prompt_file}") incorporates the necessary context and instructions for these points, whether they are explicitly stated under a section labeled "Post-Check Procedures" or implicitly covered elsewhere in the document.
68
+ Provide a "Yes" if the Content Document effectively integrates most of the points listed under the "Post-Check Procedures", considering both explicit and implicit coverage.
69
+ If the integration is lacking or incomplete, carefully reconsider whether the points are adequately addressed within the Content Document.
70
+ If you provide a "Yes," provide a detailed explanation highlighting how the Content Document integrates the Post-Check Procedures effectively. Discuss both explicit mentions and any implicit coverage that contributes to their implementation.
71
+ Make sure to accurately evaluate the alignment of the "Post-Check Procedures" provided in the Template Document with their presence or coverage in the Content Document. [/INST] </s>"""
72
+
73
+ result1=qa.run(message1)
74
+ result2=qa.run(message2)
75
+
76
+ patterns = [
77
+ r'Helpful Answer:\n\n(.*)$',
78
+ r'Helpful Answer: \n\n(.*)$'
79
+ ]
80
+ answer=list()
81
+ answer.append(result1)
82
+ answer.append(result2)
83
+ helpful_answer_texts_o = ""
84
+ for result in answer:
85
+ for pattern in patterns:
86
+ match = re.search(pattern, result, re.DOTALL)
87
+ if match:
88
+ helpful_answer_texts_o += match.group(1) + "\n\n ************************************************************"
89
+
90
+ helpful_answer_texts_o = helpful_answer_texts_o.strip()
91
+
92
+ if helpful_answer_texts_o:
93
+ return helpful_answer_texts_o
94
+
95
+ return result1 + result2
96
+
97
+
98
  def process_file(fileobj):
99
+ destination_folder="/content/check/"
100
  file_name = os.path.basename(fileobj)
101
  destination_path = os.path.join(destination_folder, file_name)
102
  shutil.copyfile(fileobj.name, destination_path)
103
  return MOP(destination_folder)
104
 
105
+
106
  demo = gr.Interface(
107
  fn=process_file,
108
  inputs=[
 
110
  ],
111
  outputs="text"
112
  )
113
+ demo.launch()